1 Install packages and functions

devtools::install_github("sysbiomed/glmSparseNet", ref = "prognostic.index")
## Skipping install of 'glmSparseNet' from a github remote, the SHA1 (67cd612b) has not changed since last install.
##   Use `force = TRUE` to force installation

1.1 Required libraries

library(SummarizedExperiment)
library(dplyr)
library(DT)
library(tidyverse)
library(futile.logger)
library(loose.rock)
library(survival)
library(GGally)
library(ggplot2)
library(survminer)
library(glmnet)
library(glmSparseNet)
library(gplots)
library(readr)
library(maftools)
library(RTCGAToolbox)
library(edgeR)
#install.packages("biospear")
library(biospear)

1.2 Functions

twiner_glmnet <- function (x,y,alpha, w) {
                      cv.glmnet(as.matrix(x),as.matrix(y), 
                               family = 'cox',foldid = my_foldid, penalty.factor = w, 
                               alpha = alpha,
                               nlambda = 100,
                               #lambda.min.ratio = 10^(-2),
                               nfolds= 10)
}

twiner_coefs <- function (cv.fit){
  coef(cv.fit, s = 'lambda.min')[,1] %>% { .[. != 0]}
}


twiner_genes <- function (cv.fit) {
  b <- which(cv.fit$glmnet.fit$beta[,which(cv.fit$cvm == min(cv.fit$cvm))] != 0)
  return (names(b))
}

train <- function (coefs) {
  separate2GroupsCox(as.vector(coefs), 
                               xdata.train[, names(coefs)], 
                               ydata.train, 
                               plot.title = 'Train Set TCox', legend.outside = FALSE) 
}

test <- function (coefs) {
  separate2GroupsCox(as.vector(coefs), 
                               xdata.test[, names(coefs)], 
                               ydata.test, 
                               plot.title = 'Test Set TCox', legend.outside = FALSE) 
}

all <- function (coefs) {
  separate2GroupsCox(as.vector(coefs), 
                               xdata[, names(coefs)], 
                               ydata, 
                               plot.title = 'all Set TCox', legend.outside = FALSE) 
}

2 Download and prepare dataframes from TCGA - RTCGAtoolbox package

2.1 Colorectal samples RNASeq data

Download COAD dataset

coadData <- getFirehoseData(dataset="COAD", runDate="20150402", 
    forceDownload=TRUE, clinical=TRUE, RNASeq2GeneNorm=TRUE) 

rnaseq1 <- coadData@RNASeq2GeneNorm[[2]]
rnaseq1 <- rnaseq1@DataMatrix
rnaseq1 <- t(rnaseq1)
rnaseq1 <- as.data.frame(rnaseq1)


rnaseq1$names <- rownames(rnaseq1)


dim(rnaseq1) #328
## [1]   328 20502
#keep only tumor samples
tumor <- rnaseq1 %>%
  filter(str_detect(names, "01A"))

dim(tumor) #287
## [1]   282 20502
tumor$names <- str_sub(tumor$names, 1, str_length(tumor$names)-16) #put names in the right format
#remove duplicated rows
tumor <- tumor[!duplicated(tumor$names), ]
rownames(tumor) <- tumor$names
tumor <- tumor[,-20502]

normal <- rnaseq1 %>%
  filter(!str_detect(names, "01A"))

dim(normal) #287
## [1]    46 20502
normal$names <- str_sub(normal$names, 1, str_length(normal$names)-16) #put names in the right format
normal <- normal[!duplicated(normal$names), ]
rownames(normal) <- normal$names
normal <- normal[,-20502]

Download READ dataset

readData <- getFirehoseData(dataset="READ", runDate="20150402", 
    forceDownload=TRUE, clinical=TRUE, RNASeq2GeneNorm=TRUE) 

rnaseq2 <- readData@RNASeq2GeneNorm[[2]]
rnaseq2 <- rnaseq2@DataMatrix
rnaseq2 <- t(rnaseq2)
rnaseq2 <- as.data.frame(rnaseq2)


rnaseq2$names <- rownames(rnaseq2)


dim(rnaseq2) #328
## [1]   105 20502
#keep only tumor samples
tumorread <- rnaseq2 %>%
  filter(str_detect(names, "01A"))

dim(tumorread) #287
## [1]    91 20502
tumorread$names <- str_sub(tumorread$names, 1, str_length(tumorread$names)-16) #put names in the right format
#remove duplicated rows
tumorread <- tumorread[!duplicated(tumorread$names), ]
rownames(tumorread) <- tumorread$names
tumorread <- tumorread[,-20502]

normalread <- rnaseq2 %>%
  filter(!str_detect(names, "01A"))

dim(normalread) #287
## [1]    14 20502
normalread$names <- str_sub(normalread$names, 1, str_length(normalread$names)-16) #put names in the right format
normalread <- normalread[!duplicated(normalread$names), ]
rownames(normalread) <- normalread$names
normalread <- normalread[,-20502]

Colorectal dataset

dim(tumor)
## [1]   282 20501
dim(normal)
## [1]    46 20501
dim(tumorread)
## [1]    91 20501
dim(normalread)
## [1]    14 20501
tumorf <- rbind(tumor,tumorread)
dim(tumorf)
## [1]   373 20501
normalf <- rbind(normal, normalread)
dim(normalf)
## [1]    60 20501
# write.table(tumorf, "crcRNAseq.txt")
# write.table(normalf, "normalRNAseq.txt")

#write.table(datasurvf, "datasurv.txt")

2.2 Clinical data

COAD

clinic <- coadData@clinical

clinic.data <- clinic[,3:5]
clinic.data$days_to_death[is.na(clinic.data$days_to_death)] <- 0

for (i in 1:nrow(clinic.data)){
  
  if (clinic.data$days_to_death[i] != 0){
    
    clinic.data$days_to_last_followup[i] <- 0
    
  }
  
datasurv <- apply(as.matrix(clinic.data),2,as.numeric)
datasurv <- as.data.frame(datasurv)
datasurv$time <- datasurv$days_to_death + datasurv$days_to_last_followup

datasurv <- datasurv[,-c(2,3)]
rownames(datasurv) <- rownames(clinic.data)
datasurv <- datasurv[datasurv$time > 0, ] 
datasurv <- na.omit(datasurv)
colnames(datasurv) <- c("status","time")

datasurv$names <- rownames(datasurv)
datasurv$names <- toupper(datasurv$names)  
datasurv$names <- chartr('.', '-', datasurv$names)

rownames(datasurv) <- datasurv$names
datasurv <- datasurv[,-3]
}

READ

clinicread <- readData@clinical

clinicread.data <- clinicread[,3:5]
clinicread.data$days_to_death[is.na(clinicread.data$days_to_death)] <- 0

for (i in 1:nrow(clinicread.data)){
  
  if (clinicread.data$days_to_death[i] != 0){
    
    clinicread.data$days_to_last_followup[i] <- 0
    
  }
  
datasurvread <- apply(as.matrix(clinicread.data),2,as.numeric)
datasurvread <- as.data.frame(datasurvread)
datasurvread$time <- datasurvread$days_to_death + datasurvread$days_to_last_followup

datasurvread <- datasurvread[,-c(2,3)]
rownames(datasurvread) <- rownames(clinicread.data)
datasurvread <- datasurvread[datasurvread$time > 0, ] 
datasurvread <- na.omit(datasurvread)
colnames(datasurvread) <- c("status","time")

datasurvread$names <- rownames(datasurvread)
datasurvread$names <- toupper(datasurvread$names)  
datasurvread$names <- chartr('.', '-', datasurvread$names)

rownames(datasurvread) <- datasurvread$names
datasurvread <- datasurvread[,-3]
}

Colorectal

dim(datasurv)
## [1] 436   2
dim(datasurvread)
## [1] 159   2
datasurvf <- rbind(datasurv,datasurvread)
dim(datasurvf)
## [1] 595   2
clinicalf <- rbind(clinic,clinicread)

3 TCox

3.1 using both normal and tumor data from TCGA

  • removing variables with sd=0
xcoad_sd <- tumorf [,sapply(seq(ncol(tumorf)), function(ix) {sd(tumorf[,ix])}) != 0] # 328 20025
xnormal_sd <- normalf[,sapply(seq(ncol(normalf)), function(ix) {sd(normalf[,ix])}) != 0] #345 48710

xcoad_less <- xcoad_sd[,which(colnames(xcoad_sd) %in% colnames(xnormal_sd))]
xnormal_less <- xnormal_sd[,which(colnames(xnormal_sd) %in% colnames(xcoad_sd))]

# normalizing data
xcoad_norm <- scale(log2(xcoad_less+1)) 
xnorm_norm <- scale(log2(xnormal_less+1)) 
  • weight vector that penalizes genes with smaller distances between tumor and normal correlation matrices
## correlation matrices

library("propagate")
library("lsa")

options(fftempdir = "~/")

xtumoral_cor <- bigcor(xcoad_norm, y = NULL, fun = "cor", size = 2000, verbose=FALSE)
xtumoral_cor <- as.data.frame(as.ffdf(xtumoral_cor))

xnormal_cor <- bigcor(xnorm_norm, y = NULL, fun = "cor", size = 2000, verbose=FALSE)
xnormal_cor <- as.data.frame(as.ffdf(xnormal_cor))

# angular distance
ang_weight <- vector()

for (i in 1:dim(xcoad_norm)[2]){
ang_weight[i] <- acos(cosine(xtumoral_cor[,i],xnormal_cor[,i]))/pi
}
##normalized weights (between 0 and 1)

tumornormal_weights <- ang_weight / max(ang_weight)
hist(ang_weight)

hist(tumornormal_weights,main="normal weight")

#1-w
tumornormal_weights1 <- 1 - tumornormal_weights
hist(tumornormal_weights1,main="1 - normal weight")

#1-w^3
tumornormal_weights2 <- 1 - (tumornormal_weights^3)
hist(tumornormal_weights2,main="1 - normal weight^3")

#(1-w)^3
tumornormal_weights3 <- (1 - tumornormal_weights)^3
hist(tumornormal_weights3,main="(1 - normal weight)^3")

#exp((1-w)^3)
tumornormal_weights4 <- exp((1 - tumornormal_weights)^3)
hist(tumornormal_weights4,main="exp((1 - normal weight)^3)")

#exp(-w^3)
tumornormal_weights5 <- exp(- tumornormal_weights^3)
hist(tumornormal_weights5,main="exp(- normal weight^3)")

#1/w
tumornormal_weights6 <- 1 / tumornormal_weights
hist(tumornormal_weights6,main="1 / normal weight")

3.2 Analyse different weight vectores:

xdata.raw <- as.data.frame(xcoad_less) #255 19525

ydata.raw <- as.data.frame(datasurvf)
ydata.raw$rownames <- rownames(ydata.raw)
ydata.raw$rownames <- chartr('.', '-', ydata.raw$rownames)
rownames(ydata.raw) <- ydata.raw$rownames
ydata <- ydata.raw[rownames(ydata.raw) %in% 
                              rownames(xdata.raw),]

  
xdata.raw <- xdata.raw[rownames(xdata.raw) %in% 
                         rownames(ydata),]
xdata <- xdata.raw %>% 
  { (apply(., 2, sd) != 0) } %>% 
  { xdata.raw[, .] } %>% 
  scale

xdata <- as.matrix(xdata) #293 17910

ydata <- ydata[,1:2]
dim(xdata)
## [1]   357 19630

Test / Training sets

set.seed(params$seed)
ixs <- loose.rock::balanced.train.and.test(which(as.logical(ydata$status)), which(as.logical(!ydata$status)), train.perc = params$train)
xdata.test <- xdata[ixs$test,]
ydata.test <- ydata[ixs$test,]
#
xdata.train <- xdata[ixs$train,]
ydata.train <- ydata[ixs$train,]

xdata.train.digest <- loose.rock::digest.cache(xdata.train)

flog.info('Size of sets: (size/events)\n * Training set: % 4d/% 4d\n * Test set:     % 4d/% 4d', 
          nrow(xdata.train), 
          sum(ydata.train$status), 
          nrow(xdata.test),
          sum(ydata.test$status))
## INFO [2020-10-23 09:09:36] Size of sets: (size/events)
##  * Training set:  249/  50
##  * Test set:      108/  22
 # * Training set:  204/  50
 # * Test set:       89/  22

#Foldid
my_foldid <- sample(1:10,size=dim(ydata.train)[1],replace=TRUE)


#Remove columns with standard deviation of zero
xdata.train <- xdata.train[, ! apply(xdata.train , 2 , function(x) sd(x, na.rm = TRUE)==0 ) ]

xdata.test <- xdata.test[, ! apply(xdata.test , 2 , function(x) sd(x, na.rm = TRUE)==0 ) ]

dim(xdata.test)
## [1]   108 19572
dim(xdata.train)
## [1]   249 19618
xdata.train <- xdata.train[,colnames(xdata.train) %in% 
                         colnames(xdata.test)]
xdata.test <- xdata.test[,colnames(xdata.test) %in% 
                         colnames(xdata.train)]

dim(xdata.test)
## [1]   108 19560
dim(xdata.train)
## [1]   249 19560
xdata.train[1:3,1:3]
##                     A1BG        A1CF       A2BP1
## TCGA-3L-AA1B -0.09037123  0.50178233 -0.29147700
## TCGA-4N-A93T  6.56494274 -0.65736676  0.03499383
## TCGA-4T-AA8H -0.14165161  0.04934209 -0.36260403

3.2.1 1/w

3.2.1.1 0.3

#glmnet
cv.fit_twii3 <- twiner_glmnet(xdata.train,ydata.train,0.3, tumornormal_weights6)


#genes
coefs.v_twii3 <- twiner_coefs(cv.fit_twii3)

fit.selected_twii3 <- twiner_genes(cv.fit_twii3)
length(fit.selected_twii3)
## [1] 10
fit.selected_twii3
##  [1] "C10orf114" "CARKD"     "FAM159A"   "IFT74"     "LOC285780" "LOC646498"
##  [7] "OSBPL3"    "PCDHB12"   "PRCD"      "TRIM67"
#train
train_twii03 <- train (coefs.v_twii3)
train_twii03$pvalue ## these values are the ones in Table 1
## [1] 0.002401583
#test
test_twii03 <- test (coefs.v_twii3)
test_twii03$pvalue
## [1] 0.07566096
#all
all_twii03 <- all (coefs.v_twii3)

3.2.1.2 0.2

#glmnet
cv.fit_twii2 <- twiner_glmnet(xdata.train,ydata.train,0.2, tumornormal_weights6)


#genes
coefs.v_twii2 <- twiner_coefs(cv.fit_twii2)

fit.selected_twii2 <- twiner_genes(cv.fit_twii2)
length(fit.selected_twii2)
## [1] 11
fit.selected_twii2
##  [1] "C10orf114" "CARKD"     "FAM159A"   "IFT74"     "LOC285780" "LOC646498"
##  [7] "OSBPL3"    "PAX5"      "PCDHB12"   "PRCD"      "TRIM67"
#train
train_twii02 <- train (coefs.v_twii2)
train_twii02$pvalue
## [1] 0.0005882518
#test
test_twii02 <- test (coefs.v_twii2)
test_twii02$pvalue
## [1] 0.06653314
#all
all_twii02 <- all (coefs.v_twii2)

3.2.1.3 0.1

#glmnet
cv.fit_twii1 <- twiner_glmnet(xdata.train,ydata.train,0.1, tumornormal_weights6)

#genes
coefs.v_twii1 <- twiner_coefs(cv.fit_twii1)

fit.selected_twii1 <- twiner_genes(cv.fit_twii1)
length(fit.selected_twii1)
## [1] 53
fit.selected_twii1
##  [1] "ANKRD26P1"    "AOX2P"        "ASPHD1"       "C10orf114"    "C20orf106"   
##  [6] "CAPN7"        "CARKD"        "CLDN9"        "COL19A1"      "COX4I2"      
## [11] "CUZD1"        "CYP27C1"      "CYP7A1"       "DCLK3"        "DCP1A"       
## [16] "DPPA2"        "EEPD1"        "FAM138B"      "FAM159A"      "FCRL2"       
## [21] "FLJ16779"     "GRAPL"        "HPCAL1"       "IFT74"        "IGLON5"      
## [26] "LBX2"         "LOC100303728" "LOC285074"    "LOC285780"    "LOC646498"   
## [31] "LOC732275"    "MEIG1"        "NKAIN4"       "NXF2B"        "OR2T5"       
## [36] "OSBPL3"       "OSTN"         "PAX5"         "PCDHA7"       "PCDHB12"     
## [41] "PGAM2"        "PRCD"         "RAB20"        "SEPT7P2"      "SNTG1"       
## [46] "TAC3"         "TERF2IP"      "TRIM67"       "TXNL4B"       "WBSCR27"     
## [51] "ZDHHC19"      "ZNF607"       "ZNF883"
#train
train_twii01 <- train (coefs.v_twii1)
train_twii01$pvalue
## [1] 2.664447e-09
#test
test_twii01 <- test (coefs.v_twii1)
test_twii01$pvalue
## [1] 0.01944886
#all
all_twii01 <- all (coefs.v_twii1)

3.2.1.4 0.05

#glmnet
cv.fit_twii05 <- twiner_glmnet(xdata.train,ydata.train,0.05, tumornormal_weights6)

#genes
coefs.v_twii05 <- twiner_coefs(cv.fit_twii05)

fit.selected_twii05 <- twiner_genes(cv.fit_twii05)
length(fit.selected_twii05)
## [1] 96
#train
train_twii05 <- train (coefs.v_twii05)

#test
test_twii05 <- test (coefs.v_twii05)

#all
all_twii05 <- all (coefs.v_twii05)

3.2.1.5 plot

alpha_points<-c(0.3, 0.2,0.1,0.05)

pvalues_twii_values_train<-c(train_twii03$pvalue,train_twii02$pvalue,train_twii01$pvalue,train_twii05$pvalue)

pvalues_twii_values_test<-c(test_twii03$pvalue,test_twii02$pvalue,test_twii01$pvalue,test_twii05$pvalue)

pvalues_twii_values_all<-c(all_twii03$pvalue,all_twii02$pvalue,all_twii01$pvalue,all_twii05$pvalue)



pvalues_twii_values<-c(train_twii03$pvalue,train_twii02$pvalue,train_twii01$pvalue,train_twii05$pvalue,test_twii03$pvalue,test_twii02$pvalue,test_twii01$pvalue,test_twii05$pvalue,all_twii03$pvalue,all_twii02$pvalue,all_twii01$pvalue,all_twii05$pvalue)
                    


pvalues_twii_method<-c(rep("pvalues TCox (1/w) train set",4),rep("pvalues TCox (1/w) test set",4),rep("pvalues TCox (1/w) all set",4))




pvalues_twii<-data.frame(alpha_points,pvalues_twii_values,pvalues_twii_method)
p_pvalues_twii<-ggplot(data = pvalues_twii, aes(x = alpha_points, y = pvalues_twii_values,color= pvalues_twii_method)) + geom_line()+geom_point()+theme_minimal()
p_pvalues_twii+ ylim(0, max(pvalues_twii_values))

3.2.2 exp(-w^3)

3.2.2.1 0.3

#glmnet
cv.fit_twexp23 <- twiner_glmnet(xdata.train,ydata.train,0.3, tumornormal_weights5)


#genes
coefs.v_twexp23 <- twiner_coefs(cv.fit_twexp23)

fit.selected_twexp23 <- twiner_genes(cv.fit_twexp23)
length(fit.selected_twexp23)
## [1] 12
#train
train_twexp203 <- train (coefs.v_twexp23)
train_twexp203$pvalue ## these values are the ones in Table 1
## [1] 0.0006276918
#test
test_twexp203 <- test (coefs.v_twexp23)
test_twexp203$pvalue
## [1] 0.4244
#all
all_twexp203 <- all (coefs.v_twexp23)

3.2.2.2 0.2

#glmnet
cv.fit_twexp22 <- twiner_glmnet(xdata.train,ydata.train,0.2, tumornormal_weights5)


#genes
coefs.v_twexp22 <- twiner_coefs(cv.fit_twexp22)

fit.selected_twexp22 <- twiner_genes(cv.fit_twexp22)
length(fit.selected_twexp22)
## [1] 18
#train
train_twexp202 <- train (coefs.v_twexp22)

#test
test_twexp202 <- test (coefs.v_twexp22)

#all
all_twexp202 <- all (coefs.v_twexp22)

3.2.2.3 0.1

#glmnet
cv.fit_twexp21 <- twiner_glmnet(xdata.train,ydata.train,0.1, tumornormal_weights5)

#genes
coefs.v_twexp21 <- twiner_coefs(cv.fit_twexp21)

fit.selected_twexp21 <- twiner_genes(cv.fit_twexp21)
length(fit.selected_twexp21)
## [1] 44
#train
train_twexp201 <- train (coefs.v_twexp21)

#test
test_twexp201 <- test (coefs.v_twexp21)

#all
all_twexp201 <- all (coefs.v_twexp21)

3.2.2.4 0.05

#glmnet
cv.fit_twexp205 <- twiner_glmnet(xdata.train,ydata.train,0.05, tumornormal_weights5)

#genes
coefs.v_twexp205 <- twiner_coefs(cv.fit_twexp205)

fit.selected_twexp205 <- twiner_genes(cv.fit_twexp205)
length(fit.selected_twexp205)
## [1] 72
#train
train_twexp205 <- train (coefs.v_twexp205)

#test
test_twexp205 <- test (coefs.v_twexp205)

#all
all_twexp205 <- all (coefs.v_twexp205)

3.2.2.5 plot

alpha_points<-c(0.3, 0.2,0.1,0.05)

pvalues_twexp2_values_train<-c(train_twexp203$pvalue,train_twexp202$pvalue,train_twexp201$pvalue,train_twexp205$pvalue)

pvalues_twexp2_values_test<-c(test_twexp203$pvalue,test_twexp202$pvalue,test_twexp201$pvalue,test_twexp205$pvalue)

pvalues_twexp2_values_all<-c(all_twexp203$pvalue,all_twexp202$pvalue,all_twexp201$pvalue,all_twexp205$pvalue)



pvalues_twexp2_values<-c(train_twexp203$pvalue,train_twexp202$pvalue,train_twexp201$pvalue,train_twexp205$pvalue,test_twexp203$pvalue,test_twexp202$pvalue,test_twexp201$pvalue,test_twexp205$pvalue,all_twexp203$pvalue,all_twexp202$pvalue,all_twexp201$pvalue,all_twexp205$pvalue)
                    


pvalues_twexp2_method<-c(rep("pvalues TCox exp(-w^3) train set",4),rep("pvalues TCox exp(-w^3)  test set",4),rep("pvalues TCox exp(-w^3)  all set",4))




pvalues_twexp2<-data.frame(alpha_points,pvalues_twexp2_values,pvalues_twexp2_method)
p_pvalues_twexp2<-ggplot(data = pvalues_twexp2, aes(x = alpha_points, y = pvalues_twexp2_values,color= pvalues_twexp2_method)) + geom_line()+geom_point()+theme_minimal()
p_pvalues_twexp2+ ylim(0, max(pvalues_twexp2_values))

3.2.3 1-W

3.2.3.1 0.3

#glmnet
cv.fit_tw1i3 <- twiner_glmnet(xdata.train,ydata.train,0.3, tumornormal_weights1)


#genes
coefs.v_tw1i3 <- twiner_coefs(cv.fit_tw1i3)

fit.selected_tw1i3 <- twiner_genes(cv.fit_tw1i3)
length(fit.selected_tw1i3)
## [1] 23
#train
train_tw1i03 <- train (coefs.v_tw1i3)

#test
test_tw1i03 <- test (coefs.v_tw1i3)

#all
all_tw1i03 <- all (coefs.v_tw1i3)

3.2.3.2 0.2

#glmnet
cv.fit_tw1i2 <- twiner_glmnet(xdata.train,ydata.train,0.2, tumornormal_weights1)


#genes
coefs.v_tw1i2 <- twiner_coefs(cv.fit_tw1i2)

fit.selected_tw1i2 <- twiner_genes(cv.fit_tw1i2)
length(fit.selected_tw1i2)
## [1] 28
#train
train_tw1i02 <- train (coefs.v_tw1i2)

#test
test_tw1i02 <- test (coefs.v_tw1i2)

#all
all_tw1i02 <- all (coefs.v_tw1i2)

3.2.3.3 0.1

#glmnet
cv.fit_tw1i1 <- twiner_glmnet(xdata.train,ydata.train,0.1, tumornormal_weights1)

#genes
coefs.v_tw1i1 <- twiner_coefs(cv.fit_tw1i1)

fit.selected_tw1i1 <- twiner_genes(cv.fit_tw1i1)
length(fit.selected_tw1i1)
## [1] 36
#train
train_tw1i01 <- train (coefs.v_tw1i1)

#test
test_tw1i01 <- test (coefs.v_tw1i1)

#all
all_tw1i01 <- all (coefs.v_tw1i1)

3.2.3.4 0.05

#glmnet
cv.fit_tw1i05 <- twiner_glmnet(xdata.train,ydata.train,0.05, tumornormal_weights1)

#genes
coefs.v_tw1i05 <- twiner_coefs(cv.fit_tw1i05)

fit.selected_tw1i05 <- twiner_genes(cv.fit_tw1i05)
length(fit.selected_tw1i05)
## [1] 47
#train
train_tw1i05 <- train (coefs.v_tw1i05)

#test
test_tw1i05 <- test (coefs.v_tw1i05)

#all
all_tw1i05 <- all (coefs.v_tw1i05)

3.2.3.5 plot

alpha_points<-c(0.3,0.2,0.1,0.05)

pvalues_tw1i_values_train<-c(train_tw1i03$pvalue,train_tw1i02$pvalue,train_tw1i01$pvalue,train_tw1i05$pvalue)

pvalues_tw1i_values_test<-c(test_tw1i03$pvalue,test_tw1i02$pvalue,test_tw1i01$pvalue,test_tw1i05$pvalue)

pvalues_tw1i_values_all<-c(all_tw1i03$pvalue,all_tw1i02$pvalue,all_tw1i01$pvalue,all_tw1i05$pvalue)



pvalues_tw1i_values<-c(train_tw1i03$pvalue,train_tw1i02$pvalue,train_tw1i01$pvalue,train_tw1i05$pvalue,test_tw1i03$pvalue,test_tw1i02$pvalue,test_tw1i01$pvalue,test_tw1i05$pvalue,all_tw1i03$pvalue,all_tw1i02$pvalue,all_tw1i01$pvalue,all_tw1i05$pvalue)
                    


pvalues_tw1i_method<-c(rep("pvalues TCox (1-w) train set",4),rep("pvalues TCox (1-w) test set",4),rep("pvalues TCox (1-w) all set",4))




pvalues_tw1i<-data.frame(alpha_points,pvalues_tw1i_values,pvalues_tw1i_method)
p_pvalues_tw1i<-ggplot(data = pvalues_tw1i, aes(x = alpha_points, y = pvalues_tw1i_values,color= pvalues_tw1i_method)) + geom_line()+geom_point()+theme_minimal()
p_pvalues_tw1i+ ylim(0, max(pvalues_tw1i_values))

3.2.4 exp((1-w)^3)

3.2.4.1 0.3

#glmnet
cv.fit_twexp3 <- twiner_glmnet(xdata.train,ydata.train,0.3, tumornormal_weights4)


#genes
coefs.v_twexp3 <- twiner_coefs(cv.fit_twexp3)

fit.selected_twexp3 <- twiner_genes(cv.fit_twexp3)
length(fit.selected_twexp3)
## [1] 21
#train
train_twexp03 <- train (coefs.v_twexp3)
train_twexp03$pvalue ## these values are the ones in Table 1
## [1] 7.367639e-07
#test
test_twexp03 <- test (coefs.v_twexp3)
test_twexp03$pvalue
## [1] 0.1141262
#all
all_twexp03 <- all (coefs.v_twexp3)

3.2.4.2 0.2

#glmnet
cv.fit_twexp2 <- twiner_glmnet(xdata.train,ydata.train,0.2, tumornormal_weights4)


#genes
coefs.v_twexp2 <- twiner_coefs(cv.fit_twexp2)

fit.selected_twexp2 <- twiner_genes(cv.fit_twexp2)
length(fit.selected_twexp2)
## [1] 24
#train
train_twexp02 <- train (coefs.v_twexp2)

#test
test_twexp02 <- test (coefs.v_twexp2)

#all
all_twexp02 <- all (coefs.v_twexp2)

3.2.4.3 0.1

#glmnet
cv.fit_twexp1 <- twiner_glmnet(xdata.train,ydata.train,0.1, tumornormal_weights4)

#genes
coefs.v_twexp1 <- twiner_coefs(cv.fit_twexp1)

fit.selected_twexp1 <- twiner_genes(cv.fit_twexp1)
length(fit.selected_twexp1)
## [1] 98
#train
train_twexp01 <- train (coefs.v_twexp1)

#test
test_twexp01 <- test (coefs.v_twexp1)

#all
all_twexp01 <- all (coefs.v_twexp1)

3.2.4.4 0.05

#glmnet
cv.fit_twexp05 <- twiner_glmnet(xdata.train,ydata.train,0.05, tumornormal_weights4)

#genes
coefs.v_twexp05 <- twiner_coefs(cv.fit_twexp05)

fit.selected_twexp05 <- twiner_genes(cv.fit_twexp05)
length(fit.selected_twexp05)
## [1] 159
#train
train_twexp05 <- train (coefs.v_twexp05)

#test
test_twexp05 <- test (coefs.v_twexp05)

#all
all_twexp05 <- all (coefs.v_twexp05)

3.2.4.5 plot

alpha_points<-c(0.3, 0.2,0.1,0.05)

pvalues_twexp_values_train<-c(train_twexp03$pvalue,train_twexp02$pvalue,train_twexp01$pvalue,train_twexp05$pvalue)

pvalues_twexp_values_test<-c(test_twexp03$pvalue,test_twexp02$pvalue,test_twexp01$pvalue,test_twexp05$pvalue)

pvalues_twexp_values_all<-c(all_twexp03$pvalue,all_twexp02$pvalue,all_twexp01$pvalue,all_twexp05$pvalue)



pvalues_twexp_values<-c(train_twexp03$pvalue,train_twexp02$pvalue,train_twexp01$pvalue,train_twexp05$pvalue,test_twexp03$pvalue,test_twexp02$pvalue,test_twexp01$pvalue,test_twexp05$pvalue,all_twexp03$pvalue,all_twexp02$pvalue,all_twexp01$pvalue,all_twexp05$pvalue)
                    


pvalues_twexp_method<-c(rep("pvalues TCox exp((1-w)^3) train set",4),rep("pvalues TCox exp((1-w)^3) test set",4),rep("pvalues TCox exp((1-w)^3) all set",4))




pvalues_twexp<-data.frame(alpha_points,pvalues_twexp_values,pvalues_twexp_method)
p_pvalues_twexp<-ggplot(data = pvalues_twexp, aes(x = alpha_points, y = pvalues_twexp_values,color= pvalues_twexp_method)) + geom_line()+geom_point()+theme_minimal()
p_pvalues_twexp+ ylim(0, max(pvalues_twexp_values))

3.2.5 1-normal weight^3

3.2.5.1 0.3

#glmnet
cv.fit_twele3 <- twiner_glmnet(xdata.train,ydata.train,0.3, tumornormal_weights2)


#genes
coefs.v_twele3 <- twiner_coefs(cv.fit_twele3)

fit.selected_twele3 <- twiner_genes(cv.fit_twele3)
length(fit.selected_twele3)
## [1] 21
#train
train_twele03 <- train (coefs.v_twele3)
train_twele03$pvalue ## these values are the ones in Table 1
## [1] 3.783471e-07
#test
test_twele03 <- test (coefs.v_twele3)
test_twele03$pvalue
## [1] 0.06125732
#all
all_twele03 <- all (coefs.v_twele3)

3.2.5.2 0.2

#glmnet
cv.fit_twele2 <- twiner_glmnet(xdata.train,ydata.train,0.2, tumornormal_weights2)


#genes
coefs.v_twele2 <- twiner_coefs(cv.fit_twele2)

fit.selected_twele2 <- twiner_genes(cv.fit_twele2)
length(fit.selected_twele2)
## [1] 21
#train
train_twele02 <- train (coefs.v_twele2)

#test
test_twele02 <- test (coefs.v_twele2)

#all
all_twele02 <- all (coefs.v_twele2)

3.2.5.3 0.1

#glmnet
cv.fit_twele1 <- twiner_glmnet(xdata.train,ydata.train,0.1, tumornormal_weights2)

#genes
coefs.v_twele1 <- twiner_coefs(cv.fit_twele1)

fit.selected_twele1 <- twiner_genes(cv.fit_twele1)
length(fit.selected_twele1)
## [1] 33
#train
train_twele01 <- train (coefs.v_twele1)

#test
test_twele01 <- test (coefs.v_twele1)

#all
all_twele01 <- all (coefs.v_twele1)

3.2.5.4 0.05

#glmnet
cv.fit_twele05 <- twiner_glmnet(xdata.train,ydata.train,0.05, tumornormal_weights2)

#genes
coefs.v_twele05 <- twiner_coefs(cv.fit_twele05)

fit.selected_twele05 <- twiner_genes(cv.fit_twele05)
length(fit.selected_twele05)
## [1] 46
#train
train_twele05 <- train (coefs.v_twele05)

#test
test_twele05 <- test (coefs.v_twele05)

#all
all_twele05 <- all (coefs.v_twele05)

3.2.5.5 plot

alpha_points<-c(0.3, 0.2,0.1,0.05)

pvalues_twele_values_train<-c(train_twele03$pvalue,train_twele02$pvalue,train_twele01$pvalue,train_twele05$pvalue)

pvalues_twele_values_test<-c(test_twele03$pvalue,test_twele02$pvalue,test_twele01$pvalue,test_twele05$pvalue)

pvalues_twele_values_all<-c(all_twele03$pvalue,all_twele02$pvalue,all_twele01$pvalue,all_twele05$pvalue)



pvalues_twele_values<-c(train_twele03$pvalue,train_twele02$pvalue,train_twele01$pvalue,train_twele05$pvalue,test_twele03$pvalue,test_twele02$pvalue,test_twele01$pvalue,test_twele05$pvalue,all_twele03$pvalue,all_twele02$pvalue,all_twele01$pvalue,all_twele05$pvalue)
                    


pvalues_twele_method<-c(rep("pvalues TCox (1-w^3) train set",4),rep("pvalues TCox (1-w^3) test set",4),rep("pvalues TCox (1-w^3) all set",4))




pvalues_twele<-data.frame(alpha_points,pvalues_twele_values,pvalues_twele_method)
p_pvalues_twele<-ggplot(data = pvalues_twele, aes(x = alpha_points, y = pvalues_twele_values,color= pvalues_twele_method)) + geom_line()+geom_point()+theme_minimal()
p_pvalues_twele+ ylim(0, max(pvalues_twele_values))

3.2.6 (1-normal weight)^3

3.2.6.1 0.3

#glmnet
cv.fit_tweleele3 <- twiner_glmnet(xdata.train,ydata.train,0.3, tumornormal_weights3)


#genes
coefs.v_tweleele3 <- twiner_coefs(cv.fit_tweleele3)

fit.selected_tweleele3 <- twiner_genes(cv.fit_tweleele3)
length(fit.selected_tweleele3)
## [1] 21
#train
train_tweleele03 <- train (coefs.v_tweleele3)
train_tweleele03$pvalue ## these values are the ones in Table 1
## [1] 2.406974e-06
#test
test_tweleele03 <- test (coefs.v_tweleele3)
test_tweleele03$pvalue
## [1] 0.534717
#all
all_tweleele03 <- all (coefs.v_tweleele3)

3.2.6.2 0.2

#glmnet
cv.fit_tweleele2 <- twiner_glmnet(xdata.train,ydata.train,0.2, tumornormal_weights3)


#genes
coefs.v_tweleele2 <- twiner_coefs(cv.fit_tweleele2)

fit.selected_tweleele2 <- twiner_genes(cv.fit_tweleele2)
length(fit.selected_tweleele2)
## [1] 21
#train
train_tweleele02 <- train (coefs.v_tweleele2)

#test
test_tweleele02 <- test (coefs.v_tweleele2)

#all
all_tweleele02 <- all (coefs.v_tweleele2)

3.2.6.3 0.1

#glmnet
cv.fit_tweleele1 <- twiner_glmnet(xdata.train,ydata.train,0.1, tumornormal_weights3)

#genes
coefs.v_tweleele1 <- twiner_coefs(cv.fit_tweleele1)

fit.selected_tweleele1 <- twiner_genes(cv.fit_tweleele1)
length(fit.selected_tweleele1)
## [1] 25
#train
train_tweleele01 <- train (coefs.v_tweleele1)

#test
test_tweleele01 <- test (coefs.v_tweleele1)

#all
all_tweleele01 <- all (coefs.v_tweleele1)

3.2.6.4 0.05

#glmnet
cv.fit_tweleele05 <- twiner_glmnet(xdata.train,ydata.train,0.05, tumornormal_weights3)

#genes
coefs.v_tweleele05 <- twiner_coefs(cv.fit_tweleele05)

fit.selected_tweleele05 <- twiner_genes(cv.fit_tweleele05)
length(fit.selected_tweleele05)
## [1] 30
#train
train_tweleele05 <- train (coefs.v_tweleele05)

#test
test_tweleele05 <- test (coefs.v_tweleele05)

#all
all_tweleele05 <- all (coefs.v_tweleele05)

3.2.6.5 plot

alpha_points<-c(0.3, 0.2,0.1,0.05)

pvalues_tweleele_values_train<-c(train_tweleele03$pvalue,train_tweleele02$pvalue,train_tweleele01$pvalue,train_tweleele05$pvalue)

pvalues_tweleele_values_test<-c(test_tweleele03$pvalue,test_tweleele02$pvalue,test_tweleele01$pvalue,test_tweleele05$pvalue)

pvalues_tweleele_values_all<-c(all_tweleele03$pvalue,all_tweleele02$pvalue,all_tweleele01$pvalue,all_tweleele05$pvalue)



pvalues_tweleele_values<-c(train_tweleele03$pvalue,train_tweleele02$pvalue,train_tweleele01$pvalue,train_tweleele05$pvalue,test_tweleele03$pvalue,test_tweleele02$pvalue,test_tweleele01$pvalue,test_tweleele05$pvalue,all_tweleele03$pvalue,all_tweleele02$pvalue,all_tweleele01$pvalue,all_tweleele05$pvalue)
                    


pvalues_tweleele_method<-c(rep("pvalues TCox (1-w)^3 train set",4),rep("pvalues TCox (1-w)^3 test set",4),rep("pvalues TCox (1-w)^3 all set",4))




pvalues_tweleele<-data.frame(alpha_points,pvalues_tweleele_values,pvalues_tweleele_method)
p_pvalues_tweleele<-ggplot(data = pvalues_tweleele, aes(x = alpha_points, y = pvalues_tweleele_values,color= pvalues_tweleele_method)) + geom_line()+geom_point()+theme_minimal()
p_pvalues_tweleele+ ylim(0, max(pvalues_tweleele_values))

3.2.7 final plot with all weights - Figure 2

alpha_points<-c(0.3,0.2,0.1,0.05)

#1-w
pvalues_tw1i_values_test<-c(test_tw1i03$pvalue,test_tw1i02$pvalue,test_tw1i01$pvalue,test_tw1i05$pvalue)
#1/w
pvalues_twii_values_test<-c(test_twii03$pvalue,test_twii02$pvalue,test_twii01$pvalue,test_twii05$pvalue)
#exp((1-w)^3)
pvalues_twexp_values_test<-c(test_twexp03$pvalue,test_twexp02$pvalue,test_twexp01$pvalue,test_twexp05$pvalue)
#exp(-w^3)
pvalues_twexp2_values_test<-c(test_twexp203$pvalue,test_twexp202$pvalue,test_twexp201$pvalue,test_twexp205$pvalue)
#1-w^3
pvalues_twele_values_test<-c(test_twele03$pvalue,test_twele02$pvalue,test_twele01$pvalue,test_twele05$pvalue)
#(1-w)^3
pvalues_tweleele_values_test<-c(test_tweleele03$pvalue,test_tweleele02$pvalue,test_tweleele01$pvalue,test_tweleele05$pvalue)



pvalues_test<-c(pvalues_tw1i_values_test,pvalues_twii_values_test,pvalues_twexp_values_test,pvalues_twexp2_values_test,pvalues_twele_values_test,pvalues_tweleele_values_test)

pvalues_method_test<-c( rep("1-w",4), rep("1/w",4), rep("exp((1-w)^3)",4), rep("exp(-w^3)",4),rep("1-w^3 ",4),rep("(1-w)^3",4))

pvalues_test_data<-data.frame(alpha_points,pvalues_test,pvalues_method_test)
p_pvalues_test<-ggplot(data = pvalues_test_data, aes(x = alpha_points, y = pvalues_test,color= pvalues_method_test)) + geom_hline(yintercept=0.05) + geom_line()+geom_point()+theme_minimal()
p_pvalues_test+ ylim(0, max(pvalues_test))

3.3 Demonstrative train and test survival curves - Figure 3

Train and Test curves obtained using 1/w weight vector and alpha = 0.1

#train
train_twii01
## $pvalue
## [1] 2.664447e-09
## 
## $plot

## 
## $km
## Call: survfit(formula = survival::Surv(time, status) ~ group, data = prognostic.index.df)
## 
##             n events median 0.95LCL 0.95UCL
## Low risk  125      8     NA      NA      NA
## High risk 124     42   1422    1126    1741
#test
test_twii01
## $pvalue
## [1] 0.01944886
## 
## $plot

## 
## $km
## Call: survfit(formula = survival::Surv(time, status) ~ group, data = prognostic.index.df)
## 
##            n events median 0.95LCL 0.95UCL
## Low risk  54      7     NA    2003      NA
## High risk 54     15   1910     992      NA

4 Compare all models

4.1 EN

4.1.1 alpha=0.3

cv.fit_elastic_net3<-cv.glmnet(as.matrix(xdata.train),as.matrix(ydata.train), 
                               family = 'cox',foldid = my_foldid, 
                               alpha = 0.3,
                               nlambda = 100,
                               #lambda.min.ratio = 10^(-3),
                               nfolds= params$nfolds)

coefs.v_elastic3 <- coef(cv.fit_elastic_net3, s = 'lambda.min')[,1] %>% { .[. != 0]}

fit.selected_elastic3 <- which(cv.fit_elastic_net3$glmnet.fit$beta[,which(cv.fit_elastic_net3$cvm == min(cv.fit_elastic_net3$cvm))] != 0)

length(fit.selected_elastic3)
## [1] 18
names(fit.selected_elastic3)
##  [1] "AFF3"      "CLDN9"     "ELFN1"     "FAM129C"   "FAM138B"   "FAM159A"  
##  [7] "GRAPL"     "KCNMB3"    "LBX2"      "LOC220930" "LOC646498" "LOC732275"
## [13] "MBL1P"     "NKX1-2"    "PBX4"      "PGAM2"     "PI4K2B"    "PRCD"
train_en03<-separate2GroupsCox(as.vector(coefs.v_elastic3), 
                               xdata.train[, names(coefs.v_elastic3)], 
                               ydata.train, 
                               plot.title = 'Train Set Elastic Net, alpha=0.3', legend.outside = FALSE)
train_en03$pvalue
## [1] 8.387034e-07
test_en03<-separate2GroupsCox(as.vector(coefs.v_elastic3), 
                              xdata.test[, names(coefs.v_elastic3)], 
                              ydata.test, 
                              plot.title = 'Test Set Elastic Net, alpha=0.3', legend.outside = FALSE)
test_en03$pvalue
## [1] 0.00882275
all_en03<-separate2GroupsCox(as.vector(coefs.v_elastic3), 
                             xdata[, names(coefs.v_elastic3)], 
                             ydata %>% mutate(time = time / 365 * 12), 
                             plot.title = 'All data Elastic Net, alpha=0.3', legend.outside = FALSE,
                             break.x.by = 12)

4.1.2 alpha=0.2

cv.fit_elastic_net2<-cv.glmnet(as.matrix(xdata.train),as.matrix(ydata.train), 
                          family = 'cox',foldid = my_foldid, 
                          alpha = 0.2,
                        nlambda = 100,
               #lambda.min.ratio = 10^(-2),
                          nfolds= params$nfolds)

coefs.v_elastic2 <- coef(cv.fit_elastic_net2, s = 'lambda.min')[,1] %>% { .[. != 0]}

fit.selected_elastic2 <- which(cv.fit_elastic_net2$glmnet.fit$beta[,which(cv.fit_elastic_net2$cvm == min(cv.fit_elastic_net2$cvm))] != 0)

length(fit.selected_elastic2)
## [1] 47
names(fit.selected_elastic2)
##  [1] "AASS"         "AFF3"         "BACH2"        "C1orf185"     "C7orf13"     
##  [6] "CLDN9"        "CLEC17A"      "CLEC18C"      "CYP7A1"       "DCAF10"      
## [11] "EEPD1"        "ELFN1"        "ENO3"         "FAM129C"      "FAM138B"     
## [16] "FAM159A"      "FATE1"        "FCRL1"        "GRAPL"        "ISL2"        
## [21] "KCNMB3"       "LBX2"         "LOC100130238" "LOC220930"    "LOC646498"   
## [26] "LOC732275"    "MBL1P"        "MEIG1"        "MYF6"         "MYH8"        
## [31] "NGLY1"        "NKAIN4"       "NKX1-2"       "PBX4"         "PCDHB12"     
## [36] "PGAM2"        "PI4K2B"       "PRCD"         "PTPN6"        "RFPL4B"      
## [41] "RPP14"        "SIX2"         "TCL1A"        "TMEM86B"      "UNC45B"      
## [46] "ZDHHC19"      "ZNF883"
train_en02<-separate2GroupsCox(as.vector(coefs.v_elastic2), 
                   xdata.train[, names(coefs.v_elastic2)], 
                   ydata.train, 
                   plot.title = 'Train Set Elastic Net, alpha=0.2', legend.outside = FALSE)
train_en02$pvalue
## [1] 2.474278e-08
test_en02<-separate2GroupsCox(as.vector(coefs.v_elastic2), 
                   xdata.test[, names(coefs.v_elastic2)], 
                   ydata.test, 
                   plot.title = 'Test Set Elastic Net, alpha=0.2', legend.outside = FALSE)
test_en02$pvalue
## [1] 0.07168201
all_en02<-separate2GroupsCox(as.vector(coefs.v_elastic2), 
                   xdata[, names(coefs.v_elastic2)], 
                   ydata %>% mutate(time = time / 365 * 12), 
                   plot.title = 'All data Elastic Net, alpha=0.2', legend.outside = FALSE,
                   break.x.by = 12)

4.1.3 alpha=0.1

cv.fit_elastic_net1<-cv.glmnet(as.matrix(xdata.train),as.matrix(ydata.train), 
                          family = 'cox',foldid = my_foldid, 
                          alpha = 0.1,
                        nlambda = 100,
               #lambda.min.ratio = 10^(-2),
                          nfolds= params$nfolds)


coefs.v_elastic1 <- coef(cv.fit_elastic_net1, s = 'lambda.min')[,1] %>% { .[. != 0]}

fit.selected_elastic1 <- which(cv.fit_elastic_net1$glmnet.fit$beta[,which(cv.fit_elastic_net1$cvm == min(cv.fit_elastic_net1$cvm))] != 0)

length(fit.selected_elastic1)
## [1] 88
names(fit.selected_elastic1)
##  [1] "AASS"         "AFF3"         "BACH2"        "BLK"          "C10orf114"   
##  [6] "C14orf72"     "C1orf185"     "C2CD4C"       "C7orf13"      "C9orf82"     
## [11] "CABP5"        "CAPRIN2"      "CCDC126"      "CD19"         "CDH18"       
## [16] "CLDN9"        "CLEC17A"      "CLEC18C"      "CLVS2"        "CST2"        
## [21] "CUZD1"        "CYP7A1"       "DCAF10"       "DNAI2"        "EEPD1"       
## [26] "EGFL7"        "ELFN1"        "EML6"         "ENO3"         "FAM129C"     
## [31] "FAM138B"      "FAM159A"      "FAM81B"       "FATE1"        "FCER2"       
## [36] "FCRL1"        "GABRD"        "GJA3"         "GRAPL"        "GUCA1B"      
## [41] "HOTAIR"       "HPCAL1"       "ISL2"         "KCNMB3"       "LBX2"        
## [46] "LOC100130238" "LOC220930"    "LOC283663"    "LOC646498"    "LOC732275"   
## [51] "LRP2BP"       "MBL1P"        "MEF2B"        "MEIG1"        "MYF6"        
## [56] "MYH8"         "NCF1C"        "NEK4"         "NELF"         "NGLY1"       
## [61] "NKAIN4"       "NKX1-2"       "PAX5"         "PBX4"         "PCDHB12"     
## [66] "PGAM2"        "PI4K2B"       "PRCD"         "PRR4"         "PTPN6"       
## [71] "PYROXD2"      "RASA4P"       "RFPL4B"       "RPP14"        "SHOX2"       
## [76] "SIX2"         "SLC25A18"     "TAS2R20"      "TCF15"        "TCL1A"       
## [81] "TERT"         "TMEM86B"      "TMEM90B"      "TUBA8"        "UNC45B"      
## [86] "ZDHHC19"      "ZNF607"       "ZNF883"
train_en01<-separate2GroupsCox(as.vector(coefs.v_elastic1), 
                   xdata.train[, names(coefs.v_elastic1)], 
                   ydata.train, 
                   plot.title = 'Train Set Elastic Net, alpha=0.1', legend.outside = FALSE)
train_en01$pvalue
## [1] 5.28787e-09
test_en01<-separate2GroupsCox(as.vector(coefs.v_elastic1), 
                   xdata.test[, names(coefs.v_elastic1)], 
                   ydata.test, 
                   plot.title = 'Test Set Elastic Net, alpha=0.1', legend.outside = FALSE)
test_en01$pvalue
## [1] 0.04921873
all_en01<-separate2GroupsCox(as.vector(coefs.v_elastic1), 
                   xdata[, names(coefs.v_elastic1)], 
                   ydata %>% mutate(time = time / 365 * 12), 
                   plot.title = 'All data Elastic Net, alpha=0.1', legend.outside = FALSE,
                   break.x.by = 12)

4.1.4 alpha=0.05

cv.fit_elastic_net05<-cv.glmnet(as.matrix(xdata.train),as.matrix(ydata.train), 
                          family = 'cox',foldid = my_foldid, 
                          alpha = 0.05,
                        nlambda = 100,
               #lambda.min.ratio = 10^(-2),
                          nfolds= params$nfolds)

coefs.v_elastic05 <- coef(cv.fit_elastic_net05, s = 'lambda.min')[,1] %>% { .[. != 0]}

fit.selected_elastic05 <- which(cv.fit_elastic_net05$glmnet.fit$beta[,which(cv.fit_elastic_net05$cvm == min(cv.fit_elastic_net05$cvm))] != 0)

length(fit.selected_elastic05)
## [1] 133
train_en05<-separate2GroupsCox(as.vector(coefs.v_elastic05), 
                   xdata.train[, names(coefs.v_elastic05)], 
                   ydata.train, 
                   plot.title = 'Train Set Elastic Net, alpha=0.05', legend.outside = FALSE)

test_en05<-separate2GroupsCox(as.vector(coefs.v_elastic05), 
                   xdata.test[, names(coefs.v_elastic05)], 
                   ydata.test, 
                   plot.title = 'Test Set Elastic Net, alpha=0.05', legend.outside = FALSE)

all_en05<-separate2GroupsCox(as.vector(coefs.v_elastic05), 
                   xdata[, names(coefs.v_elastic05)], 
                   ydata %>% mutate(time = time / 365 * 12), 
                   plot.title = 'All data Elastic Net, alpha=0.05', legend.outside = FALSE,
                   break.x.by = 12)

#####1 alpha=0.04

cv.fit_elastic_net04<-cv.glmnet(as.matrix(xdata.train),as.matrix(ydata.train), 
                          family = 'cox',foldid = my_foldid, 
                          alpha = 0.04,
                        nlambda = 100,
               #lambda.min.ratio = 10^(-2),
                          nfolds= params$nfolds)

coefs.v_elastic04 <- coef(cv.fit_elastic_net04, s = 'lambda.min')[,1] %>% { .[. != 0]}

fit.selected_elastic04 <- which(cv.fit_elastic_net04$glmnet.fit$beta[,which(cv.fit_elastic_net04$cvm == min(cv.fit_elastic_net04$cvm))] != 0)

length(fit.selected_elastic04)
## [1] 141
train_en04<-separate2GroupsCox(as.vector(coefs.v_elastic04), 
                   xdata.train[, names(coefs.v_elastic04)], 
                   ydata.train, 
                   plot.title = 'Train Set Elastic Net, alpha=0.04', legend.outside = FALSE)

test_en04<-separate2GroupsCox(as.vector(coefs.v_elastic04), 
                   xdata.test[, names(coefs.v_elastic04)], 
                   ydata.test, 
                   plot.title = 'Test Set Elastic Net, alpha=0.04', legend.outside = FALSE)

all_en04<-separate2GroupsCox(as.vector(coefs.v_elastic04), 
                   xdata[, names(coefs.v_elastic04)], 
                   ydata %>% mutate(time = time / 365 * 12), 
                   plot.title = 'All data Elastic Net, alpha=0.04', legend.outside = FALSE,
                   break.x.by = 12)

4.1.5 Pvalues KM

alpha_points<-c(0.3,0.2,0.1,0.05)

pvalues_en_train<-c(train_en03$pvalue,train_en02$pvalue,train_en01$pvalue,train_en05$pvalue)

pvalues_en_test<-c(test_en03$pvalue,test_en02$pvalue,test_en01$pvalue,test_en05$pvalue)

pvalues_en_all<-c(all_en03$pvalue,all_en02$pvalue,all_en01$pvalue,all_en05$pvalue)
pvalues_en_values<-c(train_en03$pvalue,train_en02$pvalue,train_en01$pvalue,train_en05$pvalue,
test_en03$pvalue,test_en02$pvalue,test_en01$pvalue,test_en05$pvalue,
all_en03$pvalue,all_en02$pvalue,all_en01$pvalue,all_en05$pvalue)

pvalues_en_method<-c(rep("pvalues en train set",4),rep("pvalues en test set",4),rep("pvalues en all set",4))

4.2 Hub

4.2.1 alpha=0.3

cv.fit_hub3<-cv.glmHub(as.matrix(xdata.train),as.matrix(ydata.train),
                       family = 'cox',foldid = my_foldid, network = "correlation",
                       alpha = 0.3,
                       nlambda = 100,
                       #lambda.min.ratio = 10^(-3),
                       nfolds= params$nfolds, network.options =  networkOptions(cutoff = 0.005,                                                                   min.degree = 0.6))
coefs.v_hub3 <- coef(cv.fit_hub3, s = 'lambda.min')[,1] %>% { .[. != 0]}

fit.selected_hub3 <- which(cv.fit_hub3$glmnet.fit$beta[,which(cv.fit_hub3$cvm == min(cv.fit_hub3$cvm))] != 0)

length(fit.selected_hub3)
## [1] 26
names(fit.selected_hub3)
##  [1] "AASS"      "AFF3"      "BACH2"     "C7orf13"   "CLDN9"     "DCAF10"   
##  [7] "EEPD1"     "ELFN1"     "FAM129C"   "FAM138B"   "FAM159A"   "FCRL1"    
## [13] "GRAPL"     "KCNMB3"    "LBX2"      "LOC220930" "LOC646498" "MBL1P"    
## [19] "PBX4"      "PCDHB12"   "PGAM2"     "PI4K2B"    "PRCD"      "TMEM86B"  
## [25] "UNC45B"    "ZDHHC19"
train_hub03<-separate2GroupsCox(as.vector(coefs.v_hub3),
                                xdata.train[, names(coefs.v_hub3)],
                                ydata.train,
                                plot.title = 'Train Set hub Net, alpha=0.3', legend.outside = FALSE)
train_hub03$pvalue
## [1] 1.788036e-08
test_hub03<-separate2GroupsCox(as.vector(coefs.v_hub3),
                               xdata.test[, names(coefs.v_hub3)],
                               ydata.test,
                               plot.title = 'Test Set hub Net, alpha=0.3', legend.outside = FALSE)
test_hub03$pvalue
## [1] 0.01382812
all_hub03<-separate2GroupsCox(as.vector(coefs.v_hub3),
                              xdata[, names(coefs.v_hub3)],
                              ydata %>% mutate(time = time / 365 * 12),
                              plot.title = 'All data hub Net, alpha=0.3', legend.outside = FALSE,
                              break.x.by = 12)

4.2.2 alpha=0.2

cv.fit_hub2<-cv.glmHub(as.matrix(xdata.train),as.matrix(ydata.train),
                          family = 'cox',foldid = my_foldid, network = "correlation",
                          alpha = 0.2,
                        nlambda = 100,
               #lambda.min.ratio = 10^(-2),
                          nfolds= params$nfolds, network.options =  networkOptions(cutoff = 0.005,                                                                   min.degree = 0.6))
coefs.v_hub2 <- coef(cv.fit_hub2, s = 'lambda.min')[,1] %>% { .[. != 0]}

fit.selected_hub2 <- which(cv.fit_hub2$glmnet.fit$beta[,which(cv.fit_hub2$cvm == min(cv.fit_hub2$cvm))] != 0)

length(fit.selected_hub2)
## [1] 47
names(fit.selected_hub2)
##  [1] "AASS"      "AFF3"      "BACH2"     "BLK"       "C14orf72"  "C7orf13"  
##  [7] "CLDN9"     "CLEC17A"   "DCAF10"    "EEPD1"     "EGFL7"     "ELFN1"    
## [13] "ENO3"      "FAM129C"   "FAM138B"   "FAM159A"   "FCRL1"     "GRAPL"    
## [19] "KCNMB3"    "LBX2"      "LOC220930" "LOC646498" "LOC732275" "MBL1P"    
## [25] "MEIG1"     "MYF6"      "MYH8"      "NCF1C"     "NGLY1"     "NKAIN4"   
## [31] "NKX1-2"    "PAX5"      "PBX4"      "PCDHB12"   "PGAM2"     "PI4K2B"   
## [37] "PRCD"      "PTPN6"     "RPP14"     "SHOX2"     "SIX2"      "TCL1A"    
## [43] "TMEM86B"   "TMEM90B"   "UNC45B"    "ZDHHC19"   "ZNF883"
train_hub02<-separate2GroupsCox(as.vector(coefs.v_hub2),
                   xdata.train[, names(coefs.v_hub2)],
                   ydata.train,
                   plot.title = 'Train Set hub Net, alpha=0.2', legend.outside = FALSE)
train_hub02$pvalue
## [1] 1.18224e-08
test_hub02<-separate2GroupsCox(as.vector(coefs.v_hub2),
                   xdata.test[, names(coefs.v_hub2)],
                   ydata.test,
                   plot.title = 'Test Set hub Net, alpha=0.2', legend.outside = FALSE)
test_hub02$pvalue
## [1] 0.01288524
all_hub02<-separate2GroupsCox(as.vector(coefs.v_hub2),
                   xdata[, names(coefs.v_hub2)],
                   ydata %>% mutate(time = time / 365 * 12),
                   plot.title = 'All data hub Net, alpha=0.2', legend.outside = FALSE,
                   break.x.by = 12)

4.2.3 alpha=0.1

cv.fit_hub1<-cv.glmHub(as.matrix(xdata.train),as.matrix(ydata.train), 
                          family = 'cox',foldid = my_foldid, network = "correlation", 
                          alpha = 0.1,
                        nlambda = 100,
               #lambda.min.ratio = 10^(-2),
                          nfolds= params$nfolds, network.options =  networkOptions(cutoff = 0.005,                                                                   min.degree = 0.6))


coefs.v_hub1 <- coef(cv.fit_hub1, s = 'lambda.min')[,1] %>% { .[. != 0]}

fit.selected_hub1 <- which(cv.fit_hub1$glmnet.fit$beta[,which(cv.fit_hub1$cvm == min(cv.fit_hub1$cvm))] != 0)

length(fit.selected_hub1)
## [1] 90
names(fit.selected_hub1)
##  [1] "AASS"         "AFF3"         "BACH2"        "BEND4"        "BLK"         
##  [6] "C10orf114"    "C14orf72"     "C1orf185"     "C2CD4C"       "C7orf13"     
## [11] "C9orf82"      "CAPRIN2"      "CCDC126"      "CD19"         "CDH18"       
## [16] "CLDN9"        "CLEC17A"      "CLEC18C"      "CLVS2"        "CST2"        
## [21] "CUZD1"        "CYGB"         "CYP27C1"      "CYP7A1"       "DCAF10"      
## [26] "EEPD1"        "EGFL7"        "ELFN1"        "EML6"         "ENO3"        
## [31] "FAM129C"      "FAM138B"      "FAM159A"      "FAM24B"       "FATE1"       
## [36] "FCRL1"        "GABRD"        "GRAPL"        "HPCAL1"       "IFT74"       
## [41] "ISL2"         "KCNMB3"       "LBX2"         "LIPT2"        "LOC100130238"
## [46] "LOC220930"    "LOC285074"    "LOC646498"    "LOC732275"    "LRP2BP"      
## [51] "MBL1P"        "MEF2B"        "MEIG1"        "MYF6"         "MYH8"        
## [56] "NCF1C"        "NEK4"         "NGLY1"        "NKAIN4"       "NKX1-2"      
## [61] "PAX5"         "PBX4"         "PCDHB12"      "PGAM2"        "PI4K2B"      
## [66] "PRCD"         "PTPN6"        "PYROXD2"      "RASA4P"       "RASGRP2"     
## [71] "RFPL4B"       "RFT1"         "RPP14"        "SHOX2"        "SIX2"        
## [76] "SLC25A18"     "SLFN11"       "TAS2R20"      "TCF15"        "TCL1A"       
## [81] "TERT"         "TMEM86B"      "TMEM90B"      "TUBA8"        "UNC13B"      
## [86] "UNC45B"       "WBSCR27"      "ZDHHC19"      "ZNF607"       "ZNF883"
train_hub01<-separate2GroupsCox(as.vector(coefs.v_hub1), 
                   xdata.train[, names(coefs.v_hub1)], 
                   ydata.train, 
                   plot.title = 'Train Set hub Net, alpha=0.1', legend.outside = FALSE)
train_hub01$pvalue
## [1] 2.741041e-09
test_hub01<-separate2GroupsCox(as.vector(coefs.v_hub1), 
                   xdata.test[, names(coefs.v_hub1)], 
                   ydata.test, 
                   plot.title = 'Test Set hub Net, alpha=0.1', legend.outside = FALSE)
test_hub01$pvalue
## [1] 0.04180592
all_hub01<-separate2GroupsCox(as.vector(coefs.v_hub1), 
                   xdata[, names(coefs.v_hub1)], 
                   ydata %>% mutate(time = time / 365 * 12), 
                   plot.title = 'All data hub Net, alpha=0.1', legend.outside = FALSE,
                   break.x.by = 12)

4.2.4 alpha=0.05

cv.fit_hub05<-cv.glmHub(as.matrix(xdata.train),as.matrix(ydata.train), 
                          family = 'cox',foldid = my_foldid, network = "correlation", 
                          alpha = 0.05,
                        nlambda = 100,
               #lambda.min.ratio = 10^(-2),
                          nfolds= params$nfolds, network.options =  networkOptions(cutoff = 0.005,                                                                   min.degree = 0.6))

coefs.v_hub05 <- coef(cv.fit_hub05, s = 'lambda.min')[,1] %>% { .[. != 0]}

fit.selected_hub05 <- which(cv.fit_hub05$glmnet.fit$beta[,which(cv.fit_hub05$cvm == min(cv.fit_hub05$cvm))] != 0)

length(fit.selected_hub05)
## [1] 163
train_hub05<-separate2GroupsCox(as.vector(coefs.v_hub05), 
                   xdata.train[, names(coefs.v_hub05)], 
                   ydata.train, 
                   plot.title = 'Train Set hub Net, alpha=0.05', legend.outside = FALSE)

test_hub05<-separate2GroupsCox(as.vector(coefs.v_hub05), 
                   xdata.test[, names(coefs.v_hub05)], 
                   ydata.test, 
                   plot.title = 'Test Set hub Net, alpha=0.05', legend.outside = FALSE)

all_hub05<-separate2GroupsCox(as.vector(coefs.v_hub05), 
                   xdata[, names(coefs.v_hub05)], 
                   ydata %>% mutate(time = time / 365 * 12), 
                   plot.title = 'All data hub Net, alpha=0.05', legend.outside = FALSE,
                   break.x.by = 12)

4.2.5 Pvalues KM

alpha_points<-c(0.3, 0.2,0.1,0.05)

pvalues_hub_train<-c(train_hub03$pvalue,train_hub02$pvalue,train_hub01$pvalue,train_hub05$pvalue)

pvalues_hub_test<-c(test_hub03$pvalue,test_hub02$pvalue,test_hub01$pvalue,test_hub05$pvalue)

pvalues_hub_all<-c(all_hub03$pvalue,all_hub02$pvalue,all_hub01$pvalue,all_hub05$pvalue)
pvalues_hub_values<-c(train_hub03$pvalue,train_hub02$pvalue,train_hub01$pvalue,train_hub05$pvalue,
                              test_hub03$pvalue,test_hub02$pvalue,test_hub01$pvalue,test_hub05$pvalue,
                              all_hub03$pvalue,all_hub02$pvalue,all_hub01$pvalue,all_hub05$pvalue)

pvalues_hub_method<-c(rep("pvalues hub train set",4),rep("pvalues hub test set",4),rep("pvalues hub all set",4))
 
pvalues_hub<-data.frame(alpha_points,pvalues_hub_values,pvalues_hub_method)
p_pvalues_hub<-ggplot(data = pvalues_hub, aes(x = alpha_points, y = pvalues_hub_values,color= pvalues_hub_method)) + geom_line()+geom_point()+theme_minimal()
p_pvalues_hub+ ylim(0, max(pvalues_hub_values))

4.3 Orphan

4.3.1 alpha=0.3

cv.fit_orphan3<-cv.glmOrphan(as.matrix(xdata.train),as.matrix(ydata.train), 
                             family = 'cox',foldid = my_foldid, network = "correlation", 
                             alpha = 0.3,
                             nlambda = 100,
                             #lambda.min.ratio = 10^(-3),
                             nfolds= params$nfolds, network.options =  networkOptions(cutoff = 0.001,                                                                   min.degree = 0.6))

coefs.v_orphan3 <- coef(cv.fit_orphan3, s = 'lambda.min')[,1] %>% { .[. != 0]}

fit.selected_orphan3 <- which(cv.fit_orphan3$glmnet.fit$beta[,which(cv.fit_orphan3$cvm == min(cv.fit_orphan3$cvm))] != 0)

length(fit.selected_orphan3)
## [1] 8
names(fit.selected_orphan3)
## [1] "AFF3"      "CLDN9"     "ELFN1"     "FAM138B"   "GRAPL"     "KCNMB3"   
## [7] "LBX2"      "LOC646498"
train_orphan03<-separate2GroupsCox(as.vector(coefs.v_orphan3), 
                                   xdata.train[, names(coefs.v_orphan3)], 
                                   ydata.train, 
                                   plot.title = 'Train Set orphan Net, alpha=0.3', legend.outside = FALSE)
train_orphan03$pvalue
## [1] 2.489645e-05
test_orphan03<-separate2GroupsCox(as.vector(coefs.v_orphan3), 
                                  xdata.test[, names(coefs.v_orphan3)], 
                                  ydata.test, 
                                  plot.title = 'Test Set orphan Net, alpha=0.3', legend.outside = FALSE)
test_orphan03$pvalue
## [1] 0.1518845
all_orphan03<-separate2GroupsCox(as.vector(coefs.v_orphan3), 
                                 xdata[, names(coefs.v_orphan3)], 
                                 ydata %>% mutate(time = time / 365 * 12), 
                                 plot.title = 'All data orphan Net, alpha=0.3', legend.outside = FALSE,
                                 break.x.by = 12)

4.3.2 alpha=0.2

cv.fit_orphan2<-cv.glmOrphan(as.matrix(xdata.train),as.matrix(ydata.train), 
                               family = 'cox',foldid = my_foldid, network = "correlation", 
                               alpha = 0.2,
                               nlambda = 100,
                               #lambda.min.ratio = 10^(-2),
                               nfolds= params$nfolds, network.options =  networkOptions(cutoff = 0.001,                                                                   min.degree = 0.6))

coefs.v_orphan2 <- coef(cv.fit_orphan2, s = 'lambda.min')[,1] %>% { .[. != 0]}

fit.selected_orphan2 <- which(cv.fit_orphan2$glmnet.fit$beta[,which(cv.fit_orphan2$cvm == min(cv.fit_orphan2$cvm))] != 0)

length(fit.selected_orphan2)
## [1] 44
names(fit.selected_orphan2)
##  [1] "AFF3"         "BACH2"        "C7orf13"      "CLDN9"        "CLEC17A"     
##  [6] "CLEC18C"      "CYP7A1"       "DCAF10"       "EEPD1"        "ELFN1"       
## [11] "ENO3"         "FAM129C"      "FAM138B"      "FAM159A"      "FATE1"       
## [16] "FCRL1"        "GRAPL"        "ISL2"         "KCNMB3"       "LBX2"        
## [21] "LOC100130238" "LOC220930"    "LOC646498"    "LOC732275"    "MBL1P"       
## [26] "MEIG1"        "MYF6"         "MYH8"         "NKAIN4"       "NKX1-2"      
## [31] "PBX4"         "PCDHB12"      "PGAM2"        "PI4K2B"       "PRCD"        
## [36] "PTPN6"        "RFPL4B"       "RPP14"        "SIX2"         "TCL1A"       
## [41] "TMEM86B"      "UNC45B"       "ZDHHC19"      "ZNF883"
train_orphan02<-separate2GroupsCox(as.vector(coefs.v_orphan2), 
                               xdata.train[, names(coefs.v_orphan2)], 
                               ydata.train, 
                               plot.title = 'Train Set orphan Net, alpha=0.2', legend.outside = FALSE)
train_orphan02$pvalue
## [1] 1.204941e-07
test_orphan02<-separate2GroupsCox(as.vector(coefs.v_orphan2), 
                              xdata.test[, names(coefs.v_orphan2)], 
                              ydata.test, 
                              plot.title = 'Test Set orphan Net, alpha=0.2', legend.outside = FALSE)
test_orphan02$pvalue
## [1] 0.03266713
all_orphan02<-separate2GroupsCox(as.vector(coefs.v_orphan2), 
                             xdata[, names(coefs.v_orphan2)], 
                             ydata %>% mutate(time = time / 365 * 12), 
                             plot.title = 'All data orphan Net, alpha=0.2', legend.outside = FALSE,
                             break.x.by = 12)

4.3.3 alpha=0.1

cv.fit_orphan1 <-cv.glmOrphan(as.matrix(xdata.train),as.matrix(ydata.train), 
                               family = 'cox',foldid = my_foldid, network = "correlation", 
                               alpha = 0.1,
                               nlambda = 100,
                               #lambda.min.ratio = 10^(-2),
                               nfolds= params$nfolds, network.options =  networkOptions(cutoff = 0.001,                                                                   min.degree = 0.6))

coefs.v_orphan1 <- coef(cv.fit_orphan1, s = 'lambda.min')[,1] %>% { .[. != 0]}

fit.selected_orphan1 <- which(cv.fit_orphan1$glmnet.fit$beta[,which(cv.fit_orphan1$cvm == min(cv.fit_orphan1$cvm))] != 0)

length(fit.selected_orphan1)
## [1] 67
names(fit.selected_orphan1)
##  [1] "AASS"         "AFF3"         "BACH2"        "BLK"          "C1orf185"    
##  [6] "C7orf13"      "CABP5"        "CAPRIN2"      "CCDC126"      "CD19"        
## [11] "CDH18"        "CLDN9"        "CLEC17A"      "CLEC18C"      "CLVS2"       
## [16] "CYP7A1"       "DCAF10"       "DPPA2"        "EEPD1"        "ELFN1"       
## [21] "EML6"         "ENO3"         "FAM129C"      "FAM138B"      "FAM159A"     
## [26] "FAM81B"       "FATE1"        "FCER2"        "FCRL1"        "GABRD"       
## [31] "GRAPL"        "HPCAL1"       "ISL2"         "KCNMB3"       "LBX2"        
## [36] "LOC100130238" "LOC220930"    "LOC646498"    "LOC732275"    "LRP2BP"      
## [41] "MBL1P"        "MEIG1"        "MYF6"         "MYH8"         "NGLY1"       
## [46] "NKAIN4"       "NKX1-2"       "PAX5"         "PBX4"         "PCDHB12"     
## [51] "PGAM2"        "PI4K2B"       "PRCD"         "PRR4"         "PTPN6"       
## [56] "RASA4P"       "RFPL4B"       "RPP14"        "SIX2"         "SLC25A18"    
## [61] "TCF15"        "TCL1A"        "TMEM86B"      "TUBA8"        "UNC45B"      
## [66] "ZDHHC19"      "ZNF883"
train_orphan01<-separate2GroupsCox(as.vector(coefs.v_orphan1), 
                               xdata.train[, names(coefs.v_orphan1)], 
                               ydata.train, 
                               plot.title = 'Train Set orphan Net, alpha=0.1', legend.outside = FALSE)
train_orphan01$pvalue
## [1] 6.802484e-09
test_orphan01<-separate2GroupsCox(as.vector(coefs.v_orphan1), 
                              xdata.test[, names(coefs.v_orphan1)], 
                              ydata.test, 
                              plot.title = 'Test Set orphan Net, alpha=0.1', legend.outside = FALSE)
test_orphan01$pvalue
## [1] 0.06322569
all_orphan01<-separate2GroupsCox(as.vector(coefs.v_orphan1), 
                             xdata[, names(coefs.v_orphan1)], 
                             ydata %>% mutate(time = time / 365 * 12), 
                             plot.title = 'All data orphan Net, alpha=0.1', legend.outside = FALSE,
                             break.x.by = 12)

4.3.4 alpha=0.05

cv.fit_orphan05<-cv.glmOrphan(as.matrix(xdata.train),as.matrix(ydata.train), 
                               family = 'cox',foldid = my_foldid, network = "correlation", 
                               alpha = 0.05,
                               nlambda = 100,
                               #lambda.min.ratio = 10^(-2),
                               nfolds= params$nfolds, network.options =  networkOptions(cutoff = 0.001,                                                                   min.degree = 0.6))

coefs.v_orphan05 <- coef(cv.fit_orphan05, s = 'lambda.min')[,1] %>% { .[. != 0]}

fit.selected_orphan05 <- which(cv.fit_orphan05$glmnet.fit$beta[,which(cv.fit_orphan05$cvm == min(cv.fit_orphan05$cvm))] != 0)

length(fit.selected_orphan05)
## [1] 99
train_orphan05<-separate2GroupsCox(as.vector(coefs.v_orphan05), 
                               xdata.train[, names(coefs.v_orphan05)], 
                               ydata.train, 
                               plot.title = 'Train Set orphan Net, alpha=0.05', legend.outside = FALSE)

test_orphan05<-separate2GroupsCox(as.vector(coefs.v_orphan05), 
                              xdata.test[, names(coefs.v_orphan05)], 
                              ydata.test, 
                              plot.title = 'Test Set orphan Net, alpha=0.05', legend.outside = FALSE)

all_orphan05<-separate2GroupsCox(as.vector(coefs.v_orphan05), 
                             xdata[, names(coefs.v_orphan05)], 
                             ydata %>% mutate(time = time / 365 * 12), 
                             plot.title = 'All data orphan Net, alpha=0.05', legend.outside = FALSE,
                             break.x.by = 12)

4.3.5 Pvalues KM

alpha_points<-c(0.3,0.2,0.1,0.05)

pvalues_orphan_train<-c(train_orphan03$pvalue,train_orphan02$pvalue,train_orphan01$pvalue,train_orphan05$pvalue)

pvalues_orphan_test<-c(test_orphan03$pvalue,test_orphan02$pvalue,test_orphan01$pvalue,test_orphan05$pvalue)

pvalues_orphan_all<-c(all_orphan03$pvalue,all_orphan02$pvalue,all_orphan01$pvalue,all_orphan05$pvalue)
pvalues_orphan_values<-c(train_orphan03$pvalue,train_orphan02$pvalue,train_orphan01$pvalue,train_orphan05$pvalue,
                      test_orphan03$pvalue,test_orphan02$pvalue,test_orphan01$pvalue,test_orphan05$pvalue,
                      all_orphan03$pvalue,all_orphan02$pvalue,all_orphan01$pvalue,all_orphan05$pvalue)

pvalues_orphan_method<-c(rep("pvalues orphan train set",4),rep("pvalues orphan test set",4),rep("pvalues orphan all set",4))


pvalues_orphan<-data.frame(alpha_points,pvalues_orphan_values,pvalues_orphan_method)
p_pvalues_orphan<-ggplot(data = pvalues_orphan, aes(x = alpha_points, y = pvalues_orphan_values,color= pvalues_orphan_method)) + geom_line()+geom_point()+theme_minimal()
p_pvalues_orphan+ ylim(0, max(pvalues_orphan_values))

4.4 Final plot comparing all models - Figure 4

alpha_points<-c(0.3, 0.2,0.1,0.05)

#TCox (exp(-w^3))
pvalues_twexp2_values_test<-c(test_twii03$pvalue,test_twii02$pvalue,test_twii01$pvalue,test_twii05$pvalue)

#EN
pvalues_en_test<-c(test_en03$pvalue,test_en02$pvalue,test_en01$pvalue,test_en05$pvalue)

#Hub
pvalues_hub_test<-c(test_hub03$pvalue,test_hub02$pvalue,test_hub01$pvalue,test_hub05$pvalue)

#Orphan
pvalues_orphan_test<-c(test_orphan03$pvalue,test_orphan02$pvalue,test_orphan01$pvalue,test_orphan05$pvalue)

pvalues_test<-c(pvalues_twexp2_values_test, pvalues_en_test, pvalues_hub_test, pvalues_orphan_test)

Method <-c(rep("TCox",4), rep("EN",4), rep("HubCox",4), rep("OrphanCox",4))

pvalues_test_data<-data.frame(alpha_points,pvalues_test,Method)

p_pvalues_test<-ggplot(data = pvalues_test_data, aes(x = alpha_points, y = pvalues_test,color= Method)) + geom_hline(yintercept=0.05) + geom_line()+geom_point()+theme_minimal()
p_pvalues_test+ ylim(0, max(pvalues_test))

alpha_points<-c(0.3, 0.2,0.1,0.05)

#TCox (exp(-w^3))
pvalues_twexp2_values_train<-c(train_twexp203$pvalue,train_twexp202$pvalue,train_twexp201$pvalue,train_twexp205$pvalue)

#EN
pvalues_en_train<-c(train_en03$pvalue,train_en02$pvalue,train_en01$pvalue,train_en05$pvalue)

#Hub
pvalues_hub_train<-c(train_hub03$pvalue,train_hub02$pvalue,train_hub01$pvalue,train_hub05$pvalue)

#Orphan
pvalues_orphan_train<-c(train_orphan03$pvalue,train_orphan02$pvalue,train_orphan01$pvalue,train_orphan05$pvalue)

pvalues_train<-c(pvalues_twexp2_values_train, pvalues_en_train, pvalues_hub_train, pvalues_orphan_train)

Method <-c(rep("TCox",4), rep("EN",4), rep("HubCox",4), rep("OrphanCox",4))

pvalues_train_data<-data.frame(alpha_points,pvalues_train,Method)

p_pvalues_train<-ggplot(data = pvalues_train_data, aes(x = alpha_points, y = pvalues_train,color= Method)) + geom_hline(yintercept=0.05) + geom_line()+geom_point()+theme_minimal()
p_pvalues_train+ ylim(0, max(pvalues_train))

5 CHAT tool

by all models

genes <- tumorf[,c("CYP7A1","FAM159A","LOC732275","CLDN9","LBX2","MEIG1","PAX5","NKAIN4","ZDHHC19","GRAPL","PCDHB12","EEPD1","HPCAL1","PGAM2","ZNF883","FAM138B","LOC646498","PRCD")]

geneNames(names(genes)) %>% { hallmarks(.$external_gene_name)$heatmap} + theme(title = element_text(size=9),axis.text=element_text(size=10), axis.title=element_text(size=8))

genes selected only by:

EN

genes <- tumorf[,c("HOTAIR","GJA3","LOC283663","DNAI2","NELF","GUCA1B")]

geneNames(names(genes)) %>% { hallmarks(.$external_gene_name)$heatmap} + theme(title = element_text(size=9),axis.text=element_text(size=10), axis.title=element_text(size=8))

glmSparseNet

genes <- tumorf[,c("CYGB","UNC13B","LIPT2","RFT1","BEND4","FAM24B","RASGRP2","SLFN11")]

geneNames(names(genes)) %>% { hallmarks(.$external_gene_name)$heatmap} + theme(title = element_text(size=9),axis.text=element_text(size=10), axis.title=element_text(size=8))

TCox

genes <- tumorf[,c("ANKRD26P1","CARKD","IGLON5","OSTN","RAB20","TXNL4B","AOX2P","DCLK3","FCRL2","SEPT7P2","ASPHD1","COL19A1","DCP1A","FLJ16779","LOC100303728","PCDHA7","SNTG1","COX4I2","NXF2B","TAC3","C20orf106","LOC285780","OR2T5","TERF2IP","CAPN7","OSBPL3","TRIM67")]

geneNames(names(genes)) %>% { hallmarks(.$external_gene_name)$heatmap} + theme(title = element_text(size=9),axis.text=element_text(size=10), axis.title=element_text(size=8))

6 Differentially expressed genes

6.1 Early vs. advanced stage

clinic$neoplasm_diseasestage[clinic$neoplasm_diseasestage == "stage ivb"] <- 4
clinic$neoplasm_diseasestage[clinic$neoplasm_diseasestage == "stage iva"] <- 4
clinic$neoplasm_diseasestage[clinic$neoplasm_diseasestage == "stage iv"] <- 4
clinic$neoplasm_diseasestage[clinic$neoplasm_diseasestage == "stage iiic"] <- 3
clinic$neoplasm_diseasestage[clinic$neoplasm_diseasestage == "stage iiib"] <- 3
clinic$neoplasm_diseasestage[clinic$neoplasm_diseasestage == "stage iiia"] <- 3
clinic$neoplasm_diseasestage[clinic$neoplasm_diseasestage == "stage iii"] <- 3
clinic$neoplasm_diseasestage[clinic$neoplasm_diseasestage == "stage iic"] <- 2
clinic$neoplasm_diseasestage[clinic$neoplasm_diseasestage == "stage iib"] <- 2
clinic$neoplasm_diseasestage[clinic$neoplasm_diseasestage == "stage iia"] <- 2
clinic$neoplasm_diseasestage[clinic$neoplasm_diseasestage == "stage ii"] <- 2
clinic$neoplasm_diseasestage[clinic$neoplasm_diseasestage == "stage ia"] <- 1
clinic$neoplasm_diseasestage[clinic$neoplasm_diseasestage == "stage i"] <- 1

pathology_stage <- as.data.frame(clinic$neoplasm_diseasestage)
pathology_stage$names <- rownames(clinic)
pathology_stage$names <- toupper(pathology_stage$names)  
pathology_stage$names <- chartr('.', '-', pathology_stage$names)
pathology_stage <- na.omit(pathology_stage)
row.names(pathology_stage) <- pathology_stage$names
pathology_stage <- as.data.frame(pathology_stage)
pathology_stage$`clinic$neoplasm_diseasestage` <- as.numeric(pathology_stage$`clinic$neoplasm_diseasestage`)

pathology_stage$`clinic$neoplasm_diseasestage`[pathology_stage$`clinic$neoplasm_diseasestage` == "1"] <- "early"
pathology_stage$`clinic$neoplasm_diseasestage`[pathology_stage$`clinic$neoplasm_diseasestage` == "2"] <-  "early"
pathology_stage$`clinic$neoplasm_diseasestage`[pathology_stage$`clinic$neoplasm_diseasestage` == "3"] <-  "early"
pathology_stage$`clinic$neoplasm_diseasestage`[pathology_stage$`clinic$neoplasm_diseasestage` == "4"] <- "advanced"



all_data = merge(tumorf,pathology_stage,by="row.names")
#rownames(all_data) <- all_data$submitter_id

xdata <- apply(as.matrix(all_data[,2:20502]),2,as.numeric)
xdata <- xdata[ ,- as.numeric(which(apply(xdata, 2, var) == 0))]
xdata <- t(xdata)

group <- all_data$`clinic$neoplasm_diseasestage`
y <- DGEList(counts=xdata,group=group)
#remove genes that are unexpressed or very lowly expressed in the samples
keep <- rowSums(cpm(y)>1) >= 2
y <- y[keep, , keep.lib.sizes=FALSE]

y <- calcNormFactors(y)
# plotMDS(y)

design <- model.matrix(~group)
y <- estimateDisp(y,design)
plotBCV(y)

# y$samples
# levels(y$samples$group)

To perform quasi-likelihood F-tests (The quasi-likelihood method is highly recommended for differential expression analyses of bulk RNA-seq data as it gives stricter error rate control by accounting for the uncertainty in dispersion estimation)

fit <- glmQLFit(y, design)

qlf <- glmQLFTest(fit,coef=2)


# lrt <- glmLRT(fit)
topTags(qlf)
## Coefficient:  groupearly 
##                   logFC     logCPM        F       PValue          FDR
## XAGE1D        -7.278711 -1.2348223 608.8796 5.266627e-71 8.974859e-67
## CT45A4        -5.190992 -2.8256847 595.3811 4.237922e-70 3.546180e-66
## CT45A3        -7.247917 -1.5793655 592.8965 6.242909e-70 3.546180e-66
## DKFZp686A1627 -5.847695 -2.5227528 590.9097 8.516663e-70 3.628311e-66
## DCAF4L2       -6.633494 -2.0817666 588.1865 1.305114e-69 4.448089e-66
## MAGEA10       -6.258910 -2.1003651 551.9612 4.360032e-67 1.238322e-63
## CXorf49B      -8.160416  0.1161680 505.5592 1.098395e-63 2.673965e-60
## FOXR2         -4.804525 -2.8724963 477.0349 1.716385e-61 3.656115e-58
## CT45A1        -7.280180 -0.6235422 426.2823 2.264051e-57 4.286855e-54
## TUBA3C        -4.771858 -2.5453413 357.1792 3.014495e-51 5.137001e-48
summary(decideTests(qlf))
##        groupearly
## Down          602
## NotSig      16079
## Up            360
a <- qlf$table
a <- a[a$PValue < 0.05, ] 

write.table(a, "tabela_genes_degs_earlyvsadv.csv", append = TRUE)

Plot log-fold change against log-counts per million, with DE genes highlighted:

plotMD(qlf)
abline(h=c(-1, 1), col="blue")

6.2 Tumor vs. normal

# tumor 373
# normal 60
tumorf <- t(tumorf)
normalf <- t(normalf)
all = merge(tumorf,normalf,by="row.names")
rownames(all) <- all$Row.names

all <- all[,-1]
all <- t(all)
all <- as.data.frame(all)
all$type[1:433] <- "normal"
all$type[1:373] <- "tumor"


xdatadeg <- apply(as.matrix(all[,1:20501]),2,as.numeric)
xdatadeg <- xdatadeg[,apply(xdatadeg!=0, 2, all)]
xdatadeg <- t(xdatadeg)

group <- all$type
y <- DGEList(counts=xdatadeg,group=group)
#remove genes that are unexpressed or very lowly expressed in the samples
keep <- rowSums(cpm(y)>1) >= 2
y <- y[keep, , keep.lib.sizes=FALSE]

y <- calcNormFactors(y)
# plotMDS(y)

design <- model.matrix(~group)
y <- estimateDisp(y,design)
plotBCV(y)

# y$samples
# levels(y$samples$group)

To perform quasi-likelihood F-tests (The quasi-likelihood method is highly recommended for differential expression analyses of bulk RNA-seq data as it gives stricter error rate control by accounting for the uncertainty in dispersion estimation)

fit <- glmQLFit(y, design)

qlf <- glmQLFTest(fit,coef=2)


# lrt <- glmLRT(fit)
topTags(qlf)
## Coefficient:  grouptumor 
##             logFC    logCPM         F        PValue           FDR
## UGP2    -1.907729 7.2692269 1066.7289 4.101999e-119 5.450736e-115
## CLEC3B  -3.830019 3.9500075  880.0177 1.472141e-106 9.780906e-103
## CUBN    -5.121942 1.8001048  864.0506 2.105275e-105 9.324966e-102
## A1BG    -4.951522 2.7322544  854.6489 1.023940e-104 3.401530e-101
## PHLPP2  -2.385443 5.0304506  762.5613  1.040458e-97  2.765122e-94
## SULT1A2 -3.916172 2.2476161  756.2584  3.283824e-97  7.272575e-94
## KCNIP4  -2.696053 0.7653609  721.3317  2.144063e-94  4.070044e-91
## PEX26   -1.855932 5.3462240  694.9992  3.238848e-92  5.379727e-89
## ZNF575  -2.160721 1.9742580  687.2304  1.455565e-91  2.149060e-88
## PLCD1   -2.225580 4.6921304  668.6268  5.552143e-90  7.377688e-87
summary(decideTests(qlf))
##        grouptumor
## Down         4639
## NotSig       3056
## Up           5593
b <- qlf$table
b <- b[a$PValue < 0.05, ] 

write.table(b, "tabela_genes_degs_tumorvsnorm.csv", append = TRUE)

Plot log-fold change against log-counts per million, with DE genes highlighted:

plotMD(qlf)
abline(h=c(-1, 1), col="blue")

6.3 Tumor vs. normal genes selected by all models

# tumor 373
# normal 60

few <- all[,c("CYP7A1",
"FAM159A",
"LOC732275",
"CLDN9",
"LBX2",
"MEIG1",
"PAX5",
"NKAIN4",
"ZDHHC19",
"GRAPL",
"PCDHB12",
"EEPD1",
"HPCAL1",
"PGAM2",
"ZNF883",
"FAM138B",
"LOC646498",
"PRCD","type")]

xdatadeg <- apply(as.matrix(few[,1:18]),2,as.numeric)
xdatadeg <- t(xdatadeg)

group <- all$type
y <- DGEList(counts=xdatadeg,group=group)
#remove genes that are unexpressed or very lowly expressed in the samples
keep <- rowSums(cpm(y)>1) >= 2
y <- y[keep, , keep.lib.sizes=FALSE]

y <- calcNormFactors(y)
# plotMDS(y)

design <- model.matrix(~group)
y <- estimateDisp(y,design)
plotBCV(y)

# y$samples
# levels(y$samples$group)
fit <- glmQLFit(y, design)

qlf <- glmQLFTest(fit,coef=2)


# lrt <- glmLRT(fit)
topTags(qlf)
## Coefficient:  grouptumor 
##              logFC   logCPM        F       PValue          FDR
## PRCD    -0.9164964 11.52905 65.65138 5.480847e-15 9.865525e-14
## LBX2     1.1617787 12.46498 55.69423 4.644110e-13 4.179699e-12
## EEPD1    0.8073150 18.30913 51.18356 3.589396e-12 2.153638e-11
## PAX5    -1.8627698 13.27415 43.30143 1.350442e-10 6.076989e-10
## ZDHHC19 -1.2580156 10.27088 40.31242 5.447264e-10 1.961015e-09
## FAM159A -1.0008112 11.37126 30.59998 5.480797e-08 1.644239e-07
## CLDN9    1.3526544 14.05259 27.23531 2.793765e-07 7.183967e-07
## CYP7A1  -1.5118019 10.43878 26.68755 3.648163e-07 8.208366e-07
## PGAM2   -0.4782448 12.93011 14.72795 1.425790e-04 2.851580e-04
## NKAIN4  -0.7753835 11.82751 10.16699 1.532944e-03 2.759300e-03
summary(decideTests(qlf))
##        grouptumor
## Down            9
## NotSig          6
## Up              3
decideTests(qlf)
## TestResults matrix
##           grouptumor
## CYP7A1            -1
## FAM159A           -1
## LOC732275          0
## CLDN9              1
## LBX2               1
## 13 more rows ...
plotMD(qlf)
abline(h=c(-1, 1), col="blue")

6.4 Tumor vs. normal genes selected by TCox

# tumor 373
# normal 60

few <- all[,c("ANKRD26P1",
"CARKD",
"IGLON5",
"OSTN",
"RAB20",
"TXNL4B",
"AOX2P",
"DCLK3",
"FCRL2",
"SEPT7P2",
"ASPHD1",
"COL19A1",
"DCP1A",
"FLJ16779",
"LOC100303728",
"PCDHA7",
"SNTG1",
"COX4I2",
"NXF2B",
"TAC3",
"C20orf106",
"LOC285780",
"OR2T5",
"TERF2IP",
"CAPN7",
"OSBPL3",
"TRIM67",
"type")]

xdatadeg <- apply(as.matrix(few[,1:27]),2,as.numeric)
xdatadeg <- t(xdatadeg)

group <- all$type
y <- DGEList(counts=xdatadeg,group=group)
#remove genes that are unexpressed or very lowly expressed in the samples
keep <- rowSums(cpm(y)>1) >= 2
y <- y[keep, , keep.lib.sizes=FALSE]

y <- calcNormFactors(y)
# plotMDS(y)

design <- model.matrix(~group)
y <- estimateDisp(y,design)
plotBCV(y)

# y$samples
# levels(y$samples$group)
fit <- glmQLFit(y, design)

qlf <- glmQLFTest(fit,coef=2)


# lrt <- glmLRT(fit)
topTags(qlf)
## Coefficient:  grouptumor 
##                   logFC    logCPM         F       PValue          FDR
## OSBPL3        1.4432879 17.148381 256.00019 1.117076e-45 3.016104e-44
## COL19A1      -2.7128076  9.724384 221.93491 7.074003e-41 9.549904e-40
## ASPHD1        2.0497160 14.607284 108.02651 9.304629e-23 7.392128e-22
## FCRL2        -2.3175345 11.665128 107.62380 1.095130e-22 7.392128e-22
## LOC100303728 -0.8392998 11.259928  88.68043 2.703291e-19 1.459777e-18
## DCLK3         0.9927463 11.276039  42.95900 1.579131e-10 7.106088e-10
## SEPT7P2       0.3614066 14.166342  26.53498 3.925486e-07 1.514116e-06
## NXF2B         1.7926739  8.549050  24.11833 1.282660e-06 4.328979e-06
## TXNL4B        0.3175827 15.624186  23.63773 1.625180e-06 4.875539e-06
## FLJ16779      1.3952915 11.111728  22.81463 2.439858e-06 6.587616e-06
summary(decideTests(qlf))
##        grouptumor
## Down            7
## NotSig         11
## Up              9
decideTests(qlf)
## TestResults matrix
##           grouptumor
## ANKRD26P1          1
## CARKD              0
## IGLON5             0
## OSTN              -1
## RAB20              0
## 22 more rows ...
qlf$table
##                     logFC    logCPM            F       PValue
## ANKRD26P1     0.855862474  8.457990  13.08739072 3.067562e-03
## CARKD         0.014862117 17.642616   0.05936445 8.076177e-01
## IGLON5        0.436622984 10.830858   3.70631249 5.485624e-02
## OSTN         -0.970454241  8.339538 161.97311151 1.157546e-05
## RAB20         0.011329195 17.239373   0.02430251 8.761897e-01
## TXNL4B        0.317582692 15.624186  23.63772694 1.625180e-06
## AOX2P         0.180206681  8.370341   1.51393915 5.112444e-01
## DCLK3         0.992746286 11.276039  42.95899786 1.579131e-10
## FCRL2        -2.317534452 11.665128 107.62379739 1.095130e-22
## SEPT7P2       0.361406595 14.166342  26.53497736 3.925486e-07
## ASPHD1        2.049716038 14.607284 108.02651292 9.304629e-23
## COL19A1      -2.712807613  9.724384 221.93491411 7.074003e-41
## DCP1A         0.191739065 17.009783   8.97049802 2.899824e-03
## FLJ16779      1.395291532 11.111728  22.81462741 2.439858e-06
## LOC100303728 -0.839299790 11.259928  88.68043132 2.703291e-19
## PCDHA7       -0.329656385 10.222402   2.48335938 1.157805e-01
## SNTG1        -0.223189775  8.346860   2.27166392 4.282530e-01
## COX4I2        0.094096864 10.866250   0.38219930 5.367509e-01
## NXF2B         1.792673898  8.549050  24.11833351 1.282660e-06
## TAC3         -0.695457641 11.132690  11.51492294 7.533715e-04
## C20orf106     0.151903163  8.817470   0.67892262 4.104078e-01
## LOC285780    -0.840445757  8.409863  24.66504888 2.171565e-03
## OR2T5         0.184824136  8.343255   3.47326852 3.940183e-01
## TERF2IP      -0.040738071 17.329669   0.76677326 3.816986e-01
## CAPN7         0.008884825 16.667587   0.02615964 8.715859e-01
## OSBPL3        1.443287906 17.148381 256.00018660 1.117076e-45
## TRIM67       -0.493439909 10.085970  10.99062251 9.921363e-04
plotMD(qlf)
abline(h=c(-1, 1), col="blue")

## Tumor vs. normal genes selected by EN

# tumor 373
# normal 60

few <- all[,c("HOTAIR", "GJA3", "LOC283663", "DNAI2", "NELF", "GUCA1B",
"type")]

xdatadeg <- apply(as.matrix(few[,1:6]),2,as.numeric)
xdatadeg <- t(xdatadeg)

group <- all$type
y <- DGEList(counts=xdatadeg,group=group)
#remove genes that are unexpressed or very lowly expressed in the samples
keep <- rowSums(cpm(y)>1) >= 2
y <- y[keep, , keep.lib.sizes=FALSE]

y <- calcNormFactors(y)
# plotMDS(y)

design <- model.matrix(~group)
y <- estimateDisp(y,design)
plotBCV(y)

# y$samples
# levels(y$samples$group)
fit <- glmQLFit(y, design)

qlf <- glmQLFTest(fit,coef=2)


# lrt <- glmLRT(fit)
topTags(qlf)
## Coefficient:  grouptumor 
##                logFC   logCPM         F       PValue          FDR
## LOC283663 -1.1438086 15.16075 62.409945 2.305938e-14 1.383563e-13
## GJA3       1.7383317 13.14146 23.949152 1.395968e-06 4.187905e-06
## HOTAIR     1.6202151 12.89395 10.495759 1.288313e-03 2.576627e-03
## NELF       0.1682085 19.93805  5.095770 2.447962e-02 3.671944e-02
## DNAI2     -0.6031510 10.22343  4.674232 3.116320e-02 3.739584e-02
## GUCA1B    -0.2152865 13.39286  1.704419 1.924014e-01 1.924014e-01
summary(decideTests(qlf))
##        grouptumor
## Down            2
## NotSig          1
## Up              3
decideTests(qlf)
## TestResults matrix
##           grouptumor
## HOTAIR             1
## GJA3               1
## LOC283663         -1
## DNAI2             -1
## NELF               1
## GUCA1B             0
qlf$table
##                logFC   logCPM         F       PValue
## HOTAIR     1.6202151 12.89395 10.495759 1.288313e-03
## GJA3       1.7383317 13.14146 23.949152 1.395968e-06
## LOC283663 -1.1438086 15.16075 62.409945 2.305938e-14
## DNAI2     -0.6031510 10.22343  4.674232 3.116320e-02
## NELF       0.1682085 19.93805  5.095770 2.447962e-02
## GUCA1B    -0.2152865 13.39286  1.704419 1.924014e-01
plotMD(qlf)
abline(h=c(-1, 1), col="blue")

## Tumor vs. normal genes selected by HUB

# tumor 373
# normal 60

few <- all[,c("CYGB", "UNC13B", "LIPT2", "RFT1", "BEND4", "FAM24B", "RASGRP2", "SLFN11",
"type")]

xdatadeg <- apply(as.matrix(few[,1:8]),2,as.numeric)
xdatadeg <- t(xdatadeg)

group <- all$type
y <- DGEList(counts=xdatadeg,group=group)
#remove genes that are unexpressed or very lowly expressed in the samples
keep <- rowSums(cpm(y)>1) >= 2
y <- y[keep, , keep.lib.sizes=FALSE]

y <- calcNormFactors(y)
# plotMDS(y)

design <- model.matrix(~group)
y <- estimateDisp(y,design)
plotBCV(y)

# y$samples
# levels(y$samples$group)
fit <- glmQLFit(y, design)

qlf <- glmQLFTest(fit,coef=2)


# lrt <- glmLRT(fit)
topTags(qlf)
## Coefficient:  grouptumor 
##               logFC   logCPM           F       PValue          FDR
## BEND4   -2.23226211 11.17524 178.4118594 2.161949e-34 1.729559e-33
## RASGRP2 -1.50978636 14.27713 100.7729807 1.739134e-21 6.956534e-21
## RFT1     0.85082359 17.73505  97.8953741 5.687952e-21 1.516787e-20
## FAM24B   1.31414753 13.51713  46.2870925 3.356109e-11 6.712218e-11
## LIPT2    0.64489476 13.29540  34.3161182 9.196043e-09 1.471367e-08
## CYGB    -0.31693896 17.00330   9.1675519 2.608502e-03 3.478002e-03
## UNC13B   0.05581413 19.03481   0.5044560 4.779256e-01 5.462007e-01
## SLFN11   0.06989973 16.59031   0.2387515 6.253532e-01 6.253532e-01
summary(decideTests(qlf))
##        grouptumor
## Down            3
## NotSig          2
## Up              3
decideTests(qlf)
## TestResults matrix
##         grouptumor
## CYGB            -1
## UNC13B           0
## LIPT2            1
## RFT1             1
## BEND4           -1
## FAM24B           1
## RASGRP2         -1
## SLFN11           0
qlf$table
##               logFC   logCPM           F       PValue
## CYGB    -0.31693896 17.00330   9.1675519 2.608502e-03
## UNC13B   0.05581413 19.03481   0.5044560 4.779256e-01
## LIPT2    0.64489476 13.29540  34.3161182 9.196043e-09
## RFT1     0.85082359 17.73505  97.8953741 5.687952e-21
## BEND4   -2.23226211 11.17524 178.4118594 2.161949e-34
## FAM24B   1.31414753 13.51713  46.2870925 3.356109e-11
## RASGRP2 -1.50978636 14.27713 100.7729807 1.739134e-21
## SLFN11   0.06989973 16.59031   0.2387515 6.253532e-01
plotMD(qlf)
abline(h=c(-1, 1), col="blue")

## Tumor vs. normal genes selected by all models + TCox + EN + HUB

# tumor 373
# normal 60

few <- all[,c("CYP7A1",
"FAM159A",
"LOC732275",
"CLDN9",
"LBX2",
"MEIG1",
"PAX5",
"NKAIN4",
"ZDHHC19",
"GRAPL",
"PCDHB12",
"EEPD1",
"HPCAL1",
"PGAM2",
"ZNF883",
"FAM138B",
"LOC646498",
"PRCD",
"ANKRD26P1",
"CARKD",
"IGLON5",
"OSTN",
"RAB20",
"TXNL4B",
"AOX2P",
"DCLK3",
"FCRL2",
"SEPT7P2",
"ASPHD1",
"COL19A1",
"DCP1A",
"FLJ16779",
"LOC100303728",
"PCDHA7",
"SNTG1",
"COX4I2",
"NXF2B",
"TAC3",
"C20orf106",
"LOC285780",
"OR2T5",
"TERF2IP",
"CAPN7",
"OSBPL3",
"TRIM67","HOTAIR", "GJA3", "LOC283663", "DNAI2", "NELF", "GUCA1B", "CYGB", "UNC13B", "LIPT2", "RFT1", "BEND4", "FAM24B", "RASGRP2", "SLFN11",
"type")]

xdatadeg <- apply(as.matrix(few[,1:59]),2,as.numeric)
xdatadeg <- t(xdatadeg)

group <- all$type
y <- DGEList(counts=xdatadeg,group=group)
#remove genes that are unexpressed or very lowly expressed in the samples
keep <- rowSums(cpm(y)>1) >= 2
y <- y[keep, , keep.lib.sizes=FALSE]

y <- calcNormFactors(y)
# plotMDS(y)

design <- model.matrix(~group)
y <- estimateDisp(y,design)
plotBCV(y)

# y$samples
# levels(y$samples$group)
fit <- glmQLFit(y, design)

qlf <- glmQLFTest(fit,coef=2)


# lrt <- glmLRT(fit)
topTags(qlf)
## Coefficient:  grouptumor 
##                   logFC    logCPM         F       PValue          FDR
## COL19A1      -2.8179030  8.595589 225.37179 2.304581e-41 1.359703e-39
## BEND4        -2.7094111  9.211264 212.36306 1.786624e-39 5.270542e-38
## OSBPL3        1.3627386 16.003681 202.04329 6.004244e-38 1.180835e-36
## RASGRP2      -1.9542075 12.288638 155.30003 1.052623e-30 1.552620e-29
## FCRL2        -2.4362515 10.534432 117.35523 2.225992e-24 2.626671e-23
## LOC100303728 -0.9520623 10.116237 112.04371 1.855834e-23 1.824904e-22
## ASPHD1        1.9357883 13.402640 106.51526 1.726223e-22 1.454959e-21
## PRCD         -1.0156103  8.964546 100.15023 2.317828e-21 1.709398e-20
## CYGB         -0.8826620 14.901394  74.53855 1.126285e-16 7.383422e-16
## LBX2          1.0930513  9.907015  50.89707 4.081197e-12 2.209211e-11
summary(decideTests(qlf))
##        grouptumor
## Down           23
## NotSig         20
## Up             16
decideTests(qlf)
## TestResults matrix
##           grouptumor
## CYP7A1            -1
## FAM159A           -1
## LOC732275          0
## CLDN9              1
## LBX2               1
## 54 more rows ...
qlf$table
##                    logFC    logCPM            F       PValue
## CYP7A1       -1.63334908  7.921844  32.16798004 2.578170e-08
## FAM159A      -1.09238629  8.819147  39.31712906 8.676121e-10
## LOC732275    -0.05423451  7.178948   0.37410537 8.073985e-01
## CLDN9         1.30057881 11.480171  25.95448245 5.214578e-07
## LBX2          1.09305129  9.907015  50.89707239 4.081197e-12
## MEIG1         0.14003274  7.587517   0.42857904 5.645820e-01
## PAX5         -1.93373481 10.738603  48.27667935 1.353926e-11
## NKAIN4       -0.25267343  9.247261   1.06093696 3.035725e-01
## ZDHHC19      -1.37161655  7.746099  50.87693263 4.118867e-12
## GRAPL        -0.11850276  8.810476   0.40196820 5.264069e-01
## PCDHB12      -0.50691866 10.817809  11.48211297 7.665154e-04
## EEPD1         0.71117278 15.729398  43.28256382 1.360090e-10
## HPCAL1       -0.02031115 16.800375   0.05475514 8.150962e-01
## PGAM2        -0.52915848 10.397784  17.09801459 4.259255e-05
## ZNF883       -0.03532704 10.150584   0.01805584 8.931704e-01
## FAM138B      -0.87815262  7.205567  41.76825770 1.429707e-03
## LOC646498    -0.12450154  7.169595   4.30151902 5.269022e-01
## PRCD         -1.01561035  8.964546 100.15023322 2.317828e-21
## ANKRD26P1     0.80527058  7.293802  11.42680716 6.184645e-03
## CARKD        -0.08456063 16.510355   1.46748347 2.263991e-01
## IGLON5        0.31485549  9.668320   1.92433440 1.660881e-01
## OSTN         -1.04005630  7.179727 168.37847644 6.760930e-06
## RAB20        -0.09936022 16.098856   1.66212643 1.979998e-01
## TXNL4B        0.20259027 14.477214   8.61151296 3.516813e-03
## AOX2P         0.14974118  7.210911   0.93978526 6.042511e-01
## DCLK3         0.90512959 10.136943  33.43675229 1.405100e-08
## FCRL2        -2.43625149 10.534432 117.35523476 2.225992e-24
## SEPT7P2       0.26240262 13.022431  12.21257985 5.232448e-04
## ASPHD1        1.93578827 13.402640 106.51526030 1.726223e-22
## COL19A1      -2.81790297  8.595589 225.37178995 2.304581e-41
## DCP1A         0.07496035 15.864870   1.21140171 2.716618e-01
## FLJ16779      1.34530173  9.928667  21.26377598 5.266133e-06
## LOC100303728 -0.95206229 10.116237 112.04371068 1.855834e-23
## PCDHA7       -0.45736970  9.075902   4.63035858 3.195931e-02
## SNTG1        -0.28313781  7.186221   3.21741533 3.587198e-01
## COX4I2       -0.05418245  9.688488   0.13636363 7.121024e-01
## NXF2B         1.75559879  7.383321  22.81717325 2.437667e-06
## TAC3         -0.87441618  9.987477  18.26749338 2.359190e-05
## C20orf106     0.03835324  7.657908   0.04571520 8.307943e-01
## LOC285780    -0.91060992  7.250830  27.54037455 1.003767e-03
## OR2T5         0.17011766  7.181836   2.84015052 4.455165e-01
## TERF2IP      -0.16399165 16.183656  10.63005518 1.199856e-03
## CAPN7        -0.11227073 15.530615   3.23536145 7.275568e-02
## OSBPL3        1.36273864 16.003681 202.04329084 6.004244e-38
## TRIM67       -0.62128077  8.934436  17.10274433 4.249062e-05
## HOTAIR        2.22595482  9.930021  20.66461442 7.096895e-06
## GJA3          2.26569881 10.076348  43.94657962 9.991203e-11
## LOC283663    -0.70462996 12.043082  28.10877140 1.826031e-07
## DNAI2        -0.22683552  7.371323   0.82030724 4.191321e-01
## NELF          0.77260219 17.089483  42.42859172 2.023825e-10
## GUCA1B        0.29896656 10.380007   3.47556637 6.295300e-02
## CYGB         -0.88266200 14.901394  74.53854865 1.126285e-16
## UNC13B       -0.44640654 16.957731  22.81745477 2.437328e-06
## LIPT2         0.11119801 11.132369   1.21413377 2.711227e-01
## RFT1          0.35694181 15.549585  22.91808355 2.319064e-06
## BEND4        -2.70941108  9.211264 212.36305755 1.786624e-39
## FAM24B        0.81794341 11.312068  20.17526923 9.060076e-06
## RASGRP2      -1.95420751 12.288638 155.30002946 1.052623e-30
## SLFN11       -0.44088416 14.516353   8.80868002 3.163082e-03
plotMD(qlf)
abline(h=c(-1, 1), col="blue")

# Survival with genes selected

ANKRD26P1

tumorf <- as.data.frame(t(tumorf))
gene <- as.data.frame(tumorf$ANKRD26P1)
gene$rownames <- rownames(tumorf)

gene$type[gene$`tumorf$ANKRD26P1` <= median(gene$`tumorf$ANKRD26P1`)] <- "low"
gene$type[gene$`tumorf$ANKRD26P1` > median(gene$`tumorf$ANKRD26P1`)] <- "high"

rownames(gene) <- gene$rownames

pathology_stage <- merge(datasurvf, gene, by="row.names")

time <- as.numeric(pathology_stage$time)
status <- pathology_stage$status
path_stage <- pathology_stage$type


# Surv object
kirc.surv = Surv(time,status == 1)
#kirc.surv 
# +did not died


# Kaplan-Meier estimates
fitkir <- survfit(Surv(as.numeric(time,status)) ~ path_stage)
#fitkir
#summary(fitkir)

plot(fitkir)

ggsurvplot(fitkir, data = pathology_stage, pval = TRUE, title = "ANKRD26P1")

# the log/rank test
surv.stage <- survdiff(Surv(time,status==1)~path_stage)
surv.stage
## Call:
## survdiff(formula = Surv(time, status == 1) ~ path_stage)
## 
##                   N Observed Expected (O-E)^2/E (O-E)^2/V
## path_stage=high  97       20     19.3    0.0275    0.0379
## path_stage=low  260       52     52.7    0.0101    0.0379
## 
##  Chisq= 0  on 1 degrees of freedom, p= 0.8

CARKD

gene <- as.data.frame(tumorf$CARKD)
gene$rownames <- rownames(tumorf)

gene$type[gene$`tumorf$CARKD` <= median(gene$`tumorf$CARKD`)] <- "low"
gene$type[gene$`tumorf$CARKD` > median(gene$`tumorf$CARKD`)] <- "high"

rownames(gene) <- gene$rownames

pathology_stage <- merge(datasurvf, gene, by="row.names")

time <- as.numeric(pathology_stage$time)
status <- pathology_stage$status
path_stage <- pathology_stage$type


# Surv object
kirc.surv = Surv(time,status == 1)
#kirc.surv 
# +did not died


# Kaplan-Meier estimates
fitkir <- survfit(Surv(as.numeric(time,status)) ~ path_stage)
#fitkir
#summary(fitkir)

plot(fitkir)

ggsurvplot(fitkir, data = pathology_stage, pval = TRUE,title = "CARKD")

# the log/rank test
surv.stage <- survdiff(Surv(time,status==1)~path_stage)
surv.stage
## Call:
## survdiff(formula = Surv(time, status == 1) ~ path_stage)
## 
##                   N Observed Expected (O-E)^2/E (O-E)^2/V
## path_stage=high 181       39     36.3     0.195     0.394
## path_stage=low  176       33     35.7     0.199     0.394
## 
##  Chisq= 0.4  on 1 degrees of freedom, p= 0.5

IGLON5

gene <- as.data.frame(tumorf$IGLON5)
gene$rownames <- rownames(tumorf)

gene$type[gene$`tumorf$IGLON5` <= median(gene$`tumorf$IGLON5`)] <- "low"
gene$type[gene$`tumorf$IGLON5` > median(gene$`tumorf$IGLON5`)] <- "high"

rownames(gene) <- gene$rownames

pathology_stage <- merge(datasurvf, gene, by="row.names")

time <- as.numeric(pathology_stage$time)
status <- pathology_stage$status
path_stage <- pathology_stage$type


# Surv object
kirc.surv = Surv(time,status == 1)
#kirc.surv 
# +did not died


# Kaplan-Meier estimates
fitkir <- survfit(Surv(as.numeric(time,status)) ~ path_stage)
#fitkir
#summary(fitkir)

plot(fitkir)

ggsurvplot(fitkir, data = pathology_stage, pval = TRUE,title = "IGLON5")

# the log/rank test
surv.stage <- survdiff(Surv(time,status==1)~path_stage)
surv.stage
## Call:
## survdiff(formula = Surv(time, status == 1) ~ path_stage)
## 
##                   N Observed Expected (O-E)^2/E (O-E)^2/V
## path_stage=high 182       41     31.8      2.66      4.81
## path_stage=low  175       31     40.2      2.11      4.81
## 
##  Chisq= 4.8  on 1 degrees of freedom, p= 0.03

OSTN

gene <- as.data.frame(tumorf$OSTN)
gene$rownames <- rownames(tumorf)

gene$type[gene$`tumorf$OSTN` <= median(gene$`tumorf$OSTN`)] <- "low"
gene$type[gene$`tumorf$OSTN` > median(gene$`tumorf$OSTN`)] <- "high"

rownames(gene) <- gene$rownames

pathology_stage <- merge(datasurvf, gene, by="row.names")

time <- as.numeric(pathology_stage$time)
status <- pathology_stage$status
path_stage <- pathology_stage$type


# Surv object
kirc.surv = Surv(time,status == 1)
#kirc.surv 
# +did not died


# Kaplan-Meier estimates
fitkir <- survfit(Surv(as.numeric(time,status)) ~ path_stage)
#fitkir
#summary(fitkir)

plot(fitkir)

ggsurvplot(fitkir, data = pathology_stage, pval = TRUE,title = "OSTN")

# the log/rank test
surv.stage <- survdiff(Surv(time,status==1)~path_stage)
surv.stage
## Call:
## survdiff(formula = Surv(time, status == 1) ~ path_stage)
## 
##                   N Observed Expected (O-E)^2/E (O-E)^2/V
## path_stage=high   8        5    0.692     26.79      27.3
## path_stage=low  349       67   71.308      0.26      27.3
## 
##  Chisq= 27.3  on 1 degrees of freedom, p= 2e-07

RAB20

gene <- as.data.frame(tumorf$RAB20)
gene$rownames <- rownames(tumorf)

gene$type[gene$`tumorf$RAB20` <= median(gene$`tumorf$RAB20`)] <- "low"
gene$type[gene$`tumorf$RAB20` > median(gene$`tumorf$RAB20`)] <- "high"

rownames(gene) <- gene$rownames

pathology_stage <- merge(datasurvf, gene, by="row.names")

time <- as.numeric(pathology_stage$time)
status <- pathology_stage$status
path_stage <- pathology_stage$type


# Surv object
kirc.surv = Surv(time,status == 1)
#kirc.surv 
# +did not died


# Kaplan-Meier estimates
fitkir <- survfit(Surv(as.numeric(time,status)) ~ path_stage)
#fitkir
#summary(fitkir)

plot(fitkir)

ggsurvplot(fitkir, data = pathology_stage, pval = TRUE,title = "RAB20")

# the log/rank test
surv.stage <- survdiff(Surv(time,status==1)~path_stage)
surv.stage
## Call:
## survdiff(formula = Surv(time, status == 1) ~ path_stage)
## 
##                   N Observed Expected (O-E)^2/E (O-E)^2/V
## path_stage=high 181       39     36.8     0.129     0.265
## path_stage=low  176       33     35.2     0.135     0.265
## 
##  Chisq= 0.3  on 1 degrees of freedom, p= 0.6

TXNL4B

gene <- as.data.frame(tumorf$TXNL4B)
gene$rownames <- rownames(tumorf)

gene$type[gene$`tumorf$TXNL4B` <= median(gene$`tumorf$TXNL4B`)] <- "low"
gene$type[gene$`tumorf$TXNL4B` > median(gene$`tumorf$TXNL4B`)] <- "high"

rownames(gene) <- gene$rownames

pathology_stage <- merge(datasurvf, gene, by="row.names")

time <- as.numeric(pathology_stage$time)
status <- pathology_stage$status
path_stage <- pathology_stage$type


# Surv object
kirc.surv = Surv(time,status == 1)
#kirc.surv 
# +did not died


# Kaplan-Meier estimates
fitkir <- survfit(Surv(as.numeric(time,status)) ~ path_stage)
#fitkir
#summary(fitkir)

plot(fitkir)

ggsurvplot(fitkir, data = pathology_stage, pval = TRUE,title = "TXNL4B")

# the log/rank test
surv.stage <- survdiff(Surv(time,status==1)~path_stage)
surv.stage
## Call:
## survdiff(formula = Surv(time, status == 1) ~ path_stage)
## 
##                   N Observed Expected (O-E)^2/E (O-E)^2/V
## path_stage=high 179       40     37.9     0.120     0.255
## path_stage=low  178       32     34.1     0.133     0.255
## 
##  Chisq= 0.3  on 1 degrees of freedom, p= 0.6

AOX2P

gene <- as.data.frame(tumorf$AOX2P)
gene$rownames <- rownames(tumorf)

gene$type[gene$`tumorf$AOX2P` <= median(gene$`tumorf$AOX2P`)] <- "low"
gene$type[gene$`tumorf$AOX2P` > median(gene$`tumorf$AOX2P`)] <- "high"

rownames(gene) <- gene$rownames

pathology_stage <- merge(datasurvf, gene, by="row.names")

time <- as.numeric(pathology_stage$time)
status <- pathology_stage$status
path_stage <- pathology_stage$type


# Surv object
kirc.surv = Surv(time,status == 1)
#kirc.surv 
# +did not died


# Kaplan-Meier estimates
fitkir <- survfit(Surv(as.numeric(time,status)) ~ path_stage)
#fitkir
#summary(fitkir)

plot(fitkir)

ggsurvplot(fitkir, data = pathology_stage, pval = TRUE,title = "AOX2P")

# the log/rank test
surv.stage <- survdiff(Surv(time,status==1)~path_stage)
surv.stage
## Call:
## survdiff(formula = Surv(time, status == 1) ~ path_stage)
## 
##                   N Observed Expected (O-E)^2/E (O-E)^2/V
## path_stage=high  47       11     7.03     2.247      2.53
## path_stage=low  310       61    64.97     0.243      2.53
## 
##  Chisq= 2.5  on 1 degrees of freedom, p= 0.1

DCLK3

gene <- as.data.frame(tumorf$DCLK3)
gene$rownames <- rownames(tumorf)

gene$type[gene$`tumorf$DCLK3` <= median(gene$`tumorf$DCLK3`)] <- "low"
gene$type[gene$`tumorf$DCLK3` > median(gene$`tumorf$DCLK3`)] <- "high"

rownames(gene) <- gene$rownames

pathology_stage <- merge(datasurvf, gene, by="row.names")

time <- as.numeric(pathology_stage$time)
status <- pathology_stage$status
path_stage <- pathology_stage$type


# Surv object
kirc.surv = Surv(time,status == 1)
#kirc.surv 
# +did not died


# Kaplan-Meier estimates
fitkir <- survfit(Surv(as.numeric(time,status)) ~ path_stage)
#fitkir
#summary(fitkir)

plot(fitkir)

ggsurvplot(fitkir, data = pathology_stage, pval = TRUE,title = "DCLK3")

# the log/rank test
surv.stage <- survdiff(Surv(time,status==1)~path_stage)
surv.stage
## Call:
## survdiff(formula = Surv(time, status == 1) ~ path_stage)
## 
##                   N Observed Expected (O-E)^2/E (O-E)^2/V
## path_stage=high 177       39       35     0.451     0.881
## path_stage=low  180       33       37     0.427     0.881
## 
##  Chisq= 0.9  on 1 degrees of freedom, p= 0.3

FCRL2

gene <- as.data.frame(tumorf$FCRL2)
gene$rownames <- rownames(tumorf)

gene$type[gene$`tumorf$FCRL2` <= median(gene$`tumorf$FCRL2`)] <- "low"
gene$type[gene$`tumorf$FCRL2` > median(gene$`tumorf$FCRL2`)] <- "high"

rownames(gene) <- gene$rownames

pathology_stage <- merge(datasurvf, gene, by="row.names")

time <- as.numeric(pathology_stage$time)
status <- pathology_stage$status
path_stage <- pathology_stage$type


# Surv object
kirc.surv = Surv(time,status == 1)
#kirc.surv 
# +did not died


# Kaplan-Meier estimates
fitkir <- survfit(Surv(as.numeric(time,status)) ~ path_stage)
#fitkir
#summary(fitkir)

plot(fitkir)
ggsurvplot(fitkir, data = pathology_stage, pval = TRUE,title = "FCRL2")

# the log/rank test
surv.stage <- survdiff(Surv(time,status==1)~path_stage)
surv.stage
## Call:
## survdiff(formula = Surv(time, status == 1) ~ path_stage)
## 
##                   N Observed Expected (O-E)^2/E (O-E)^2/V
## path_stage=high 182       31     32.2    0.0430    0.0787
## path_stage=low  175       41     39.8    0.0347    0.0787
## 
##  Chisq= 0.1  on 1 degrees of freedom, p= 0.8

SEPT7P2

gene <- as.data.frame(tumorf$SEPT7P2)
gene$rownames <- rownames(tumorf)

gene$type[gene$`tumorf$SEPT7P2` <= median(gene$`tumorf$SEPT7P2`)] <- "low"
gene$type[gene$`tumorf$SEPT7P2` > median(gene$`tumorf$SEPT7P2`)] <- "high"

rownames(gene) <- gene$rownames

pathology_stage <- merge(datasurvf, gene, by="row.names")

time <- as.numeric(pathology_stage$time)
status <- pathology_stage$status
path_stage <- pathology_stage$type


# Surv object
kirc.surv = Surv(time,status == 1)
#kirc.surv 
# +did not died


# Kaplan-Meier estimates
fitkir <- survfit(Surv(as.numeric(time,status)) ~ path_stage)
#fitkir
#summary(fitkir)

plot(fitkir)

ggsurvplot(fitkir, data = pathology_stage, pval = TRUE,title = "SEPT7P2")

# the log/rank test
surv.stage <- survdiff(Surv(time,status==1)~path_stage)
surv.stage
## Call:
## survdiff(formula = Surv(time, status == 1) ~ path_stage)
## 
##                   N Observed Expected (O-E)^2/E (O-E)^2/V
## path_stage=high 177       43     35.5      1.56      3.12
## path_stage=low  180       29     36.5      1.52      3.12
## 
##  Chisq= 3.1  on 1 degrees of freedom, p= 0.08

ASPHD1

gene <- as.data.frame(tumorf$ASPHD1)
gene$rownames <- rownames(tumorf)

gene$type[gene$`tumorf$ASPHD1` <= median(gene$`tumorf$ASPHD1`)] <- "low"
gene$type[gene$`tumorf$ASPHD1` > median(gene$`tumorf$ASPHD1`)] <- "high"

rownames(gene) <- gene$rownames

pathology_stage <- merge(datasurvf, gene, by="row.names")

time <- as.numeric(pathology_stage$time)
status <- pathology_stage$status
path_stage <- pathology_stage$type


# Surv object
kirc.surv = Surv(time,status == 1)
#kirc.surv 
# +did not died


# Kaplan-Meier estimates
fitkir <- survfit(Surv(as.numeric(time,status)) ~ path_stage)
#fitkir
#summary(fitkir)

plot(fitkir)

ggsurvplot(fitkir, data = pathology_stage, pval = TRUE,title = "ASPHD1")

# the log/rank test
surv.stage <- survdiff(Surv(time,status==1)~path_stage)
surv.stage
## Call:
## survdiff(formula = Surv(time, status == 1) ~ path_stage)
## 
##                   N Observed Expected (O-E)^2/E (O-E)^2/V
## path_stage=high 178       43     35.2      1.72      3.38
## path_stage=low  179       29     36.8      1.65      3.38
## 
##  Chisq= 3.4  on 1 degrees of freedom, p= 0.07

COL19A1

gene <- as.data.frame(tumorf$COL19A1)
gene$rownames <- rownames(tumorf)

gene$type[gene$`tumorf$COL19A1` <= median(gene$`tumorf$COL19A1`)] <- "low"
gene$type[gene$`tumorf$COL19A1` > median(gene$`tumorf$COL19A1`)] <- "high"

rownames(gene) <- gene$rownames

pathology_stage <- merge(datasurvf, gene, by="row.names")

time <- as.numeric(pathology_stage$time)
status <- pathology_stage$status
path_stage <- pathology_stage$type


# Surv object
kirc.surv = Surv(time,status == 1)
#kirc.surv 
# +did not died


# Kaplan-Meier estimates
fitkir <- survfit(Surv(as.numeric(time,status)) ~ path_stage)
#fitkir
#summary(fitkir)

plot(fitkir)

ggsurvplot(fitkir, data = pathology_stage, pval = TRUE,title = "COL19A1")

# the log/rank test
surv.stage <- survdiff(Surv(time,status==1)~path_stage)
surv.stage
## Call:
## survdiff(formula = Surv(time, status == 1) ~ path_stage)
## 
##                   N Observed Expected (O-E)^2/E (O-E)^2/V
## path_stage=high 182       30     32.5     0.193     0.357
## path_stage=low  175       42     39.5     0.159     0.357
## 
##  Chisq= 0.4  on 1 degrees of freedom, p= 0.6

DCP1A

gene <- as.data.frame(tumorf$DCP1A)
gene$rownames <- rownames(tumorf)

gene$type[gene$`tumorf$DCP1A` <= median(gene$`tumorf$DCP1A`)] <- "low"
gene$type[gene$`tumorf$DCP1A` > median(gene$`tumorf$DCP1A`)] <- "high"

rownames(gene) <- gene$rownames

pathology_stage <- merge(datasurvf, gene, by="row.names")

time <- as.numeric(pathology_stage$time)
status <- pathology_stage$status
path_stage <- pathology_stage$type


# Surv object
kirc.surv = Surv(time,status == 1)
#kirc.surv 
# +did not died


# Kaplan-Meier estimates
fitkir <- survfit(Surv(as.numeric(time,status)) ~ path_stage)
#fitkir
#summary(fitkir)

plot(fitkir)

ggsurvplot(fitkir, data = pathology_stage, pval = TRUE,title = "DCP1A")

# the log/rank test
surv.stage <- survdiff(Surv(time,status==1)~path_stage)
surv.stage
## Call:
## survdiff(formula = Surv(time, status == 1) ~ path_stage)
## 
##                   N Observed Expected (O-E)^2/E (O-E)^2/V
## path_stage=high 175       27     35.1      1.89      3.71
## path_stage=low  182       45     36.9      1.80      3.71
## 
##  Chisq= 3.7  on 1 degrees of freedom, p= 0.05

FLJ16779

gene <- as.data.frame(tumorf$FLJ16779)
gene$rownames <- rownames(tumorf)

gene$type[gene$`tumorf$FLJ16779` <= median(gene$`tumorf$FLJ16779`)] <- "low"
gene$type[gene$`tumorf$FLJ16779` > median(gene$`tumorf$FLJ16779`)] <- "high"

rownames(gene) <- gene$rownames

pathology_stage <- merge(datasurvf, gene, by="row.names")

time <- as.numeric(pathology_stage$time)
status <- pathology_stage$status
path_stage <- pathology_stage$type


# Surv object
kirc.surv = Surv(time,status == 1)
#kirc.surv 
# +did not died


# Kaplan-Meier estimates
fitkir <- survfit(Surv(as.numeric(time,status)) ~ path_stage)
#fitkir
#summary(fitkir)

plot(fitkir)

ggsurvplot(fitkir, data = pathology_stage, pval = TRUE,title = "FLJ16779")

# the log/rank test
surv.stage <- survdiff(Surv(time,status==1)~path_stage)
surv.stage
## Call:
## survdiff(formula = Surv(time, status == 1) ~ path_stage)
## 
##                   N Observed Expected (O-E)^2/E (O-E)^2/V
## path_stage=high 177       41       32      2.54       4.6
## path_stage=low  180       31       40      2.03       4.6
## 
##  Chisq= 4.6  on 1 degrees of freedom, p= 0.03

LOC100303728

gene <- as.data.frame(tumorf$LOC100303728)
gene$rownames <- rownames(tumorf)

gene$type[gene$`tumorf$LOC100303728` <= median(gene$`tumorf$LOC100303728`)] <- "low"
gene$type[gene$`tumorf$LOC100303728` > median(gene$`tumorf$LOC100303728`)] <- "high"

rownames(gene) <- gene$rownames

pathology_stage <- merge(datasurvf, gene, by="row.names")

time <- as.numeric(pathology_stage$time)
status <- pathology_stage$status
path_stage <- pathology_stage$type


# Surv object
kirc.surv = Surv(time,status == 1)
#kirc.surv 
# +did not died


# Kaplan-Meier estimates
fitkir <- survfit(Surv(as.numeric(time,status)) ~ path_stage)
#fitkir
#summary(fitkir)

plot(fitkir)

ggsurvplot(fitkir, data = pathology_stage, pval = TRUE,title = "LOC100303728")

# the log/rank test
surv.stage <- survdiff(Surv(time,status==1)~path_stage)
surv.stage
## Call:
## survdiff(formula = Surv(time, status == 1) ~ path_stage)
## 
##                   N Observed Expected (O-E)^2/E (O-E)^2/V
## path_stage=high 178       47     37.9      2.17       4.6
## path_stage=low  179       25     34.1      2.41       4.6
## 
##  Chisq= 4.6  on 1 degrees of freedom, p= 0.03

PCDHA7

gene <- as.data.frame(tumorf$PCDHA7)
gene$rownames <- rownames(tumorf)

gene$type[gene$`tumorf$PCDHA7` <= median(gene$`tumorf$PCDHA7`)] <- "low"
gene$type[gene$`tumorf$PCDHA7` > median(gene$`tumorf$PCDHA7`)] <- "high"

rownames(gene) <- gene$rownames

pathology_stage <- merge(datasurvf, gene, by="row.names")

time <- as.numeric(pathology_stage$time)
status <- pathology_stage$status
path_stage <- pathology_stage$type


# Surv object
kirc.surv = Surv(time,status == 1)
#kirc.surv 
# +did not died


# Kaplan-Meier estimates
fitkir <- survfit(Surv(as.numeric(time,status)) ~ path_stage)
#fitkir
#summary(fitkir)

plot(fitkir)

ggsurvplot(fitkir, data = pathology_stage, pval = TRUE,title = "PCDHA7")

# the log/rank test
surv.stage <- survdiff(Surv(time,status==1)~path_stage)
surv.stage
## Call:
## survdiff(formula = Surv(time, status == 1) ~ path_stage)
## 
##                   N Observed Expected (O-E)^2/E (O-E)^2/V
## path_stage=high 179       38     34.9     0.268     0.522
## path_stage=low  178       34     37.1     0.253     0.522
## 
##  Chisq= 0.5  on 1 degrees of freedom, p= 0.5

SNTG1

gene <- as.data.frame(tumorf$SNTG1)
gene$rownames <- rownames(tumorf)

gene$type[gene$`tumorf$SNTG1` <= median(gene$`tumorf$SNTG1`)] <- "low"
gene$type[gene$`tumorf$SNTG1` > median(gene$`tumorf$SNTG1`)] <- "high"

rownames(gene) <- gene$rownames

pathology_stage <- merge(datasurvf, gene, by="row.names")

time <- as.numeric(pathology_stage$time)
status <- pathology_stage$status
path_stage <- pathology_stage$type


# Surv object
kirc.surv = Surv(time,status == 1)
#kirc.surv 
# +did not died


# Kaplan-Meier estimates
fitkir <- survfit(Surv(as.numeric(time,status)) ~ path_stage)
#fitkir
#summary(fitkir)

plot(fitkir)

ggsurvplot(fitkir, data = pathology_stage, pval = TRUE,title = "SNTG1")

# the log/rank test
surv.stage <- survdiff(Surv(time,status==1)~path_stage)
surv.stage
## Call:
## survdiff(formula = Surv(time, status == 1) ~ path_stage)
## 
##                   N Observed Expected (O-E)^2/E (O-E)^2/V
## path_stage=high   7        3    0.702    7.5217      7.65
## path_stage=low  350       69   71.298    0.0741      7.65
## 
##  Chisq= 7.6  on 1 degrees of freedom, p= 0.006

COX4I2

gene <- as.data.frame(tumorf$COX4I2)
gene$rownames <- rownames(tumorf)

gene$type[gene$`tumorf$COX4I2` <= median(gene$`tumorf$COX4I2`)] <- "low"
gene$type[gene$`tumorf$COX4I2` > median(gene$`tumorf$COX4I2`)] <- "high"

rownames(gene) <- gene$rownames

pathology_stage <- merge(datasurvf, gene, by="row.names")

time <- as.numeric(pathology_stage$time)
status <- pathology_stage$status
path_stage <- pathology_stage$type


# Surv object
kirc.surv = Surv(time,status == 1)
#kirc.surv 
# +did not died


# Kaplan-Meier estimates
fitkir <- survfit(Surv(as.numeric(time,status)) ~ path_stage)
#fitkir
#summary(fitkir)

plot(fitkir)

ggsurvplot(fitkir, data = pathology_stage, pval = TRUE,title = "COX4I2")

# the log/rank test
surv.stage <- survdiff(Surv(time,status==1)~path_stage)
surv.stage
## Call:
## survdiff(formula = Surv(time, status == 1) ~ path_stage)
## 
##                   N Observed Expected (O-E)^2/E (O-E)^2/V
## path_stage=high 179       44     31.7      4.75      8.57
## path_stage=low  178       28     40.3      3.74      8.57
## 
##  Chisq= 8.6  on 1 degrees of freedom, p= 0.003

NXF2B

gene <- as.data.frame(tumorf$NXF2B)
gene$rownames <- rownames(tumorf)

gene$type[gene$`tumorf$NXF2B` <= median(gene$`tumorf$NXF2B`)] <- "low"
gene$type[gene$`tumorf$NXF2B` > median(gene$`tumorf$NXF2B`)] <- "high"

rownames(gene) <- gene$rownames

pathology_stage <- merge(datasurvf, gene, by="row.names")

time <- as.numeric(pathology_stage$time)
status <- pathology_stage$status
path_stage <- pathology_stage$type


# Surv object
kirc.surv = Surv(time,status == 1)
#kirc.surv 
# +did not died


# Kaplan-Meier estimates
fitkir <- survfit(Surv(as.numeric(time,status)) ~ path_stage)
#fitkir
#summary(fitkir)

plot(fitkir)
ggsurvplot(fitkir, data = pathology_stage, pval = TRUE,title = "NXF2B")

# the log/rank test
surv.stage <- survdiff(Surv(time,status==1)~path_stage)
surv.stage
## Call:
## survdiff(formula = Surv(time, status == 1) ~ path_stage)
## 
##                   N Observed Expected (O-E)^2/E (O-E)^2/V
## path_stage=high  36       10     6.64     1.695      1.88
## path_stage=low  321       62    65.36     0.172      1.88
## 
##  Chisq= 1.9  on 1 degrees of freedom, p= 0.2

TAC3

gene <- as.data.frame(tumorf$TAC3)
gene$rownames <- rownames(tumorf)

gene$type[gene$`tumorf$TAC3` <= median(gene$`tumorf$TAC3`)] <- "low"
gene$type[gene$`tumorf$TAC3` > median(gene$`tumorf$TAC3`)] <- "high"

rownames(gene) <- gene$rownames

pathology_stage <- merge(datasurvf, gene, by="row.names")

time <- as.numeric(pathology_stage$time)
status <- pathology_stage$status
path_stage <- pathology_stage$type


# Surv object
kirc.surv = Surv(time,status == 1)
#kirc.surv 
# +did not died


# Kaplan-Meier estimates
fitkir <- survfit(Surv(as.numeric(time,status)) ~ path_stage)
#fitkir
#summary(fitkir)

plot(fitkir)

ggsurvplot(fitkir, data = pathology_stage, pval = TRUE,title = "TAC3")

# the log/rank test
surv.stage <- survdiff(Surv(time,status==1)~path_stage)
surv.stage
## Call:
## survdiff(formula = Surv(time, status == 1) ~ path_stage)
## 
##                   N Observed Expected (O-E)^2/E (O-E)^2/V
## path_stage=high 176       37     37.2   0.00104   0.00215
## path_stage=low  181       35     34.8   0.00111   0.00215
## 
##  Chisq= 0  on 1 degrees of freedom, p= 1

C20orf106

gene <- as.data.frame(tumorf$C20orf106)
gene$rownames <- rownames(tumorf)

gene$type[gene$`tumorf$C20orf106` <= median(gene$`tumorf$C20orf106`)] <- "low"
gene$type[gene$`tumorf$C20orf106` > median(gene$`tumorf$C20orf106`)] <- "high"

rownames(gene) <- gene$rownames

pathology_stage <- merge(datasurvf, gene, by="row.names")

time <- as.numeric(pathology_stage$time)
status <- pathology_stage$status
path_stage <- pathology_stage$type


# Surv object
kirc.surv = Surv(time,status == 1)
#kirc.surv 
# +did not died


# Kaplan-Meier estimates
fitkir <- survfit(Surv(as.numeric(time,status)) ~ path_stage)
#fitkir
#summary(fitkir)

plot(fitkir)

ggsurvplot(fitkir, data = pathology_stage, pval = TRUE,title = "C20orf106")

# the log/rank test
surv.stage <- survdiff(Surv(time,status==1)~path_stage)
surv.stage
## Call:
## survdiff(formula = Surv(time, status == 1) ~ path_stage)
## 
##                   N Observed Expected (O-E)^2/E (O-E)^2/V
## path_stage=high 175       41     30.5      3.62      6.38
## path_stage=low  182       31     41.5      2.66      6.38
## 
##  Chisq= 6.4  on 1 degrees of freedom, p= 0.01

LOC285780

gene <- as.data.frame(tumorf$LOC285780)
gene$rownames <- rownames(tumorf)

gene$type[gene$`tumorf$LOC285780` <= median(gene$`tumorf$LOC285780`)] <- "low"
gene$type[gene$`tumorf$LOC285780` > median(gene$`tumorf$LOC285780`)] <- "high"

rownames(gene) <- gene$rownames

pathology_stage <- merge(datasurvf, gene, by="row.names")

time <- as.numeric(pathology_stage$time)
status <- pathology_stage$status
path_stage <- pathology_stage$type


# Surv object
kirc.surv = Surv(time,status == 1)
#kirc.surv 
# +did not died


# Kaplan-Meier estimates
fitkir <- survfit(Surv(as.numeric(time,status)) ~ path_stage)
#fitkir
#summary(fitkir)

plot(fitkir)

ggsurvplot(fitkir, data = pathology_stage, pval = TRUE,title = "LOC285780")

# the log/rank test
surv.stage <- survdiff(Surv(time,status==1)~path_stage)
surv.stage
## Call:
## survdiff(formula = Surv(time, status == 1) ~ path_stage)
## 
##                   N Observed Expected (O-E)^2/E (O-E)^2/V
## path_stage=high  65       18     11.3     3.930      4.68
## path_stage=low  292       54     60.7     0.734      4.68
## 
##  Chisq= 4.7  on 1 degrees of freedom, p= 0.03

OR2T5

gene <- as.data.frame(tumorf$OR2T5)
gene$rownames <- rownames(tumorf)

gene$type[gene$`tumorf$OR2T5` <= median(gene$`tumorf$OR2T5`)] <- "low"
gene$type[gene$`tumorf$OR2T5` > median(gene$`tumorf$OR2T5`)] <- "high"

rownames(gene) <- gene$rownames

pathology_stage <- merge(datasurvf, gene, by="row.names")

time <- as.numeric(pathology_stage$time)
status <- pathology_stage$status
path_stage <- pathology_stage$type


# Surv object
kirc.surv = Surv(time,status == 1)
#kirc.surv 
# +did not died


# Kaplan-Meier estimates
fitkir <- survfit(Surv(as.numeric(time,status)) ~ path_stage)
#fitkir
#summary(fitkir)

plot(fitkir)

ggsurvplot(fitkir, data = pathology_stage, pval = TRUE,title = "OR2T5")

# the log/rank test
surv.stage <- survdiff(Surv(time,status==1)~path_stage)
surv.stage
## Call:
## survdiff(formula = Surv(time, status == 1) ~ path_stage)
## 
##                   N Observed Expected (O-E)^2/E (O-E)^2/V
## path_stage=high  15        5     2.32     3.107      3.24
## path_stage=low  342       67    69.68     0.103      3.24
## 
##  Chisq= 3.2  on 1 degrees of freedom, p= 0.07

TERF2IP

gene <- as.data.frame(tumorf$TERF2IP)
gene$rownames <- rownames(tumorf)

gene$type[gene$`tumorf$TERF2IP` <= median(gene$`tumorf$TERF2IP`)] <- "low"
gene$type[gene$`tumorf$TERF2IP` > median(gene$`tumorf$TERF2IP`)] <- "high"

rownames(gene) <- gene$rownames

pathology_stage <- merge(datasurvf, gene, by="row.names")

time <- as.numeric(pathology_stage$time)
status <- pathology_stage$status
path_stage <- pathology_stage$type


# Surv object
kirc.surv = Surv(time,status == 1)
#kirc.surv 
# +did not died


# Kaplan-Meier estimates
fitkir <- survfit(Surv(as.numeric(time,status)) ~ path_stage)
#fitkir
#summary(fitkir)



plot(fitkir)

ggsurvplot(fitkir, data = pathology_stage, pval = TRUE,title = "TERF2IP")

# the log/rank test
surv.stage <- survdiff(Surv(time,status==1)~path_stage)
surv.stage
## Call:
## survdiff(formula = Surv(time, status == 1) ~ path_stage)
## 
##                   N Observed Expected (O-E)^2/E (O-E)^2/V
## path_stage=high 181       45     34.5      3.18      6.12
## path_stage=low  176       27     37.5      2.93      6.12
## 
##  Chisq= 6.1  on 1 degrees of freedom, p= 0.01

CAPN7

gene <- as.data.frame(tumorf$CAPN7)
gene$rownames <- rownames(tumorf)

gene$type[gene$`tumorf$CAPN7` <= median(gene$`tumorf$CAPN7`)] <- "low"
gene$type[gene$`tumorf$CAPN7` > median(gene$`tumorf$CAPN7`)] <- "high"

rownames(gene) <- gene$rownames

pathology_stage <- merge(datasurvf, gene, by="row.names")

time <- as.numeric(pathology_stage$time)
status <- pathology_stage$status
path_stage <- pathology_stage$type


# Surv object
kirc.surv = Surv(time,status == 1)
#kirc.surv 
# +did not died


# Kaplan-Meier estimates
fitkir <- survfit(Surv(as.numeric(time,status)) ~ path_stage)
#fitkir
#summary(fitkir)

plot(fitkir)

ggsurvplot(fitkir, data = pathology_stage, pval = TRUE,title = "CAPN7")

# the log/rank test
surv.stage <- survdiff(Surv(time,status==1)~path_stage)
surv.stage
## Call:
## survdiff(formula = Surv(time, status == 1) ~ path_stage)
## 
##                   N Observed Expected (O-E)^2/E (O-E)^2/V
## path_stage=high 178       27     33.5      1.25      2.35
## path_stage=low  179       45     38.5      1.09      2.35
## 
##  Chisq= 2.4  on 1 degrees of freedom, p= 0.1

OSBPL3

gene <- as.data.frame(tumorf$OSBPL3)
gene$rownames <- rownames(tumorf)

gene$type[gene$`tumorf$OSBPL3` <= median(gene$`tumorf$OSBPL3`)] <- "low"
gene$type[gene$`tumorf$OSBPL3` > median(gene$`tumorf$OSBPL3`)] <- "high"

rownames(gene) <- gene$rownames

pathology_stage <- merge(datasurvf, gene, by="row.names")

time <- as.numeric(pathology_stage$time)
status <- pathology_stage$status
path_stage <- pathology_stage$type


# Surv object
kirc.surv = Surv(time,status == 1)
#kirc.surv 
# +did not died


# Kaplan-Meier estimates
fitkir <- survfit(Surv(as.numeric(time,status)) ~ path_stage)
#fitkir
#summary(fitkir)

plot(fitkir)

ggsurvplot(fitkir, data = pathology_stage, pval = TRUE,title = "OSBPL3")

# the log/rank test
surv.stage <- survdiff(Surv(time,status==1)~path_stage)
surv.stage
## Call:
## survdiff(formula = Surv(time, status == 1) ~ path_stage)
## 
##                   N Observed Expected (O-E)^2/E (O-E)^2/V
## path_stage=high 176       43     33.1      2.97      5.52
## path_stage=low  181       29     38.9      2.53      5.52
## 
##  Chisq= 5.5  on 1 degrees of freedom, p= 0.02

TRIM67

gene <- as.data.frame(tumorf$TRIM67)
gene$rownames <- rownames(tumorf)

gene$type[gene$`tumorf$TRIM67` <= median(gene$`tumorf$TRIM67`)] <- "low"
gene$type[gene$`tumorf$TRIM67` > median(gene$`tumorf$TRIM67`)] <- "high"

rownames(gene) <- gene$rownames

pathology_stage <- merge(datasurvf, gene, by="row.names")

time <- as.numeric(pathology_stage$time)
status <- pathology_stage$status
path_stage <- pathology_stage$type


# Surv object
kirc.surv = Surv(time,status == 1)
#kirc.surv 
# +did not died


# Kaplan-Meier estimates
fitkir <- survfit(Surv(as.numeric(time,status)) ~ path_stage)
#fitkir
#summary(fitkir)

plot(fitkir)

ggsurvplot(fitkir, data = pathology_stage, pval = TRUE,title = "TRIM671")

# the log/rank test
surv.stage <- survdiff(Surv(time,status==1)~path_stage)
surv.stage
## Call:
## survdiff(formula = Surv(time, status == 1) ~ path_stage)
## 
##                   N Observed Expected (O-E)^2/E (O-E)^2/V
## path_stage=high 179       42     36.5     0.826      1.68
## path_stage=low  178       30     35.5     0.849      1.68
## 
##  Chisq= 1.7  on 1 degrees of freedom, p= 0.2